From da56def7c2461804d613a522df32799ca7a54f62 Mon Sep 17 00:00:00 2001 From: Protobuf Team Bot Date: Tue, 26 Dec 2023 19:08:52 +0000 Subject: [PATCH] Auto-generate files after cl/593821827 --- php/ext/google/protobuf/php-upb.c | 2875 +++++++++++++------------ php/ext/google/protobuf/php-upb.h | 276 +-- ruby/ext/google/protobuf_c/ruby-upb.c | 2875 +++++++++++++------------ ruby/ext/google/protobuf_c/ruby-upb.h | 276 +-- upb/cmake/CMakeLists.txt | 3 +- 5 files changed, 3179 insertions(+), 3126 deletions(-) diff --git a/php/ext/google/protobuf/php-upb.c b/php/ext/google/protobuf/php-upb.c index 7ab0b5248e83..00301dd36814 100644 --- a/php/ext/google/protobuf/php-upb.c +++ b/php/ext/google/protobuf/php-upb.c @@ -12971,7 +12971,7 @@ static const char* upb_Decoder_DecodeSize(upb_Decoder* d, const char* ptr, } static void _upb_Decoder_MungeInt32(wireval* val) { - if (!_upb_IsLittleEndian()) { + if (!UPB_PRIVATE(_upb_IsLittleEndian)()) { /* The next stage will memcpy(dst, &val, 4) */ val->uint32_val = val->uint64_val; } @@ -13191,7 +13191,7 @@ static const char* _upb_Decoder_DecodeFixedPacked( arr->UPB_PRIVATE(size) += count; // Note: if/when the decoder supports multi-buffer input, we will need to // handle buffer seams here. - if (_upb_IsLittleEndian()) { + if (UPB_PRIVATE(_upb_IsLittleEndian)()) { ptr = upb_EpsCopyInputStream_Copy(&d->input, ptr, mem, val->size); } else { int delta = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, val->size); @@ -13515,7 +13515,7 @@ const char* _upb_Decoder_CheckRequired(upb_Decoder* d, const char* ptr, } uint64_t msg_head; memcpy(&msg_head, msg, 8); - msg_head = _upb_BigEndian_Swap64(msg_head); + msg_head = UPB_PRIVATE(_upb_BigEndian64)(msg_head); if (UPB_PRIVATE(_upb_MiniTable_RequiredMask)(m) & ~msg_head) { d->missing_required = true; } @@ -14144,1621 +14144,1624 @@ upb_DecodeStatus upb_Decode(const char* buf, size_t size, upb_Message* msg, #undef OP_FIXPCK_LG2 #undef OP_VARPCK_LG2 -// Fast decoder: ~3x the speed of decode.c, but requires x86-64/ARM64. -// Also the table size grows by 2x. -// -// Could potentially be ported to other 64-bit archs that pass at least six -// arguments in registers and have 8 unused high bits in pointers. -// -// The overall design is to create specialized functions for every possible -// field type (eg. oneof boolean field with a 1 byte tag) and then dispatch -// to the specialized function as quickly as possible. +// We encode backwards, to avoid pre-computing lengths (one-pass encode). +#include +#include +#include +#include + // Must be last. -#if UPB_FASTTABLE +#define UPB_PB_VARINT_MAX_LEN 10 -// The standard set of arguments passed to each parsing function. -// Thanks to x86-64 calling conventions, these will stay in registers. -#define UPB_PARSE_PARAMS \ - upb_Decoder *d, const char *ptr, upb_Message *msg, intptr_t table, \ - uint64_t hasbits, uint64_t data +UPB_NOINLINE +static size_t encode_varint64(uint64_t val, char* buf) { + size_t i = 0; + do { + uint8_t byte = val & 0x7fU; + val >>= 7; + if (val) byte |= 0x80U; + buf[i++] = byte; + } while (val); + return i; +} -#define UPB_PARSE_ARGS d, ptr, msg, table, hasbits, data +static uint32_t encode_zz32(int32_t n) { + return ((uint32_t)n << 1) ^ (n >> 31); +} +static uint64_t encode_zz64(int64_t n) { + return ((uint64_t)n << 1) ^ (n >> 63); +} -#define RETURN_GENERIC(m) \ - /* Uncomment either of these for debugging purposes. */ \ - /* fprintf(stderr, m); */ \ - /*__builtin_trap(); */ \ - return _upb_FastDecoder_DecodeGeneric(d, ptr, msg, table, hasbits, 0); +typedef struct { + upb_EncodeStatus status; + jmp_buf err; + upb_Arena* arena; + char *buf, *ptr, *limit; + int options; + int depth; + _upb_mapsorter sorter; +} upb_encstate; -typedef enum { - CARD_s = 0, /* Singular (optional, non-repeated) */ - CARD_o = 1, /* Oneof */ - CARD_r = 2, /* Repeated */ - CARD_p = 3 /* Packed Repeated */ -} upb_card; +static size_t upb_roundup_pow2(size_t bytes) { + size_t ret = 128; + while (ret < bytes) { + ret *= 2; + } + return ret; +} -UPB_NOINLINE -static const char* fastdecode_isdonefallback(UPB_PARSE_PARAMS) { - int overrun = data; - ptr = _upb_EpsCopyInputStream_IsDoneFallbackInline( - &d->input, ptr, overrun, _upb_Decoder_BufferFlipCallback); - data = _upb_FastDecoder_LoadTag(ptr); - UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); +UPB_NORETURN static void encode_err(upb_encstate* e, upb_EncodeStatus s) { + UPB_ASSERT(s != kUpb_EncodeStatus_Ok); + e->status = s; + UPB_LONGJMP(e->err, 1); } -UPB_FORCEINLINE -static const char* fastdecode_dispatch(UPB_PARSE_PARAMS) { - int overrun; - switch (upb_EpsCopyInputStream_IsDoneStatus(&d->input, ptr, &overrun)) { - case kUpb_IsDoneStatus_Done: - *(uint32_t*)msg |= hasbits; // Sync hasbits. - const upb_MiniTable* m = decode_totablep(table); - return UPB_UNLIKELY(m->UPB_PRIVATE(required_count)) - ? _upb_Decoder_CheckRequired(d, ptr, msg, m) - : ptr; - case kUpb_IsDoneStatus_NotDone: - break; - case kUpb_IsDoneStatus_NeedFallback: - data = overrun; - UPB_MUSTTAIL return fastdecode_isdonefallback(UPB_PARSE_ARGS); - } +UPB_NOINLINE +static void encode_growbuffer(upb_encstate* e, size_t bytes) { + size_t old_size = e->limit - e->buf; + size_t new_size = upb_roundup_pow2(bytes + (e->limit - e->ptr)); + char* new_buf = upb_Arena_Realloc(e->arena, e->buf, old_size, new_size); - // Read two bytes of tag data (for a one-byte tag, the high byte is junk). - data = _upb_FastDecoder_LoadTag(ptr); - UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); -} + if (!new_buf) encode_err(e, kUpb_EncodeStatus_OutOfMemory); -UPB_FORCEINLINE -static bool fastdecode_checktag(uint16_t data, int tagbytes) { - if (tagbytes == 1) { - return (data & 0xff) == 0; - } else { - return data == 0; + // We want previous data at the end, realloc() put it at the beginning. + // TODO: This is somewhat inefficient since we are copying twice. + // Maybe create a realloc() that copies to the end of the new buffer? + if (old_size > 0) { + memmove(new_buf + new_size - old_size, e->buf, old_size); } -} -UPB_FORCEINLINE -static const char* fastdecode_longsize(const char* ptr, int* size) { - int i; - UPB_ASSERT(*size & 0x80); - *size &= 0xff; - for (i = 0; i < 3; i++) { - ptr++; - size_t byte = (uint8_t)ptr[-1]; - *size += (byte - 1) << (7 + 7 * i); - if (UPB_LIKELY((byte & 0x80) == 0)) return ptr; - } - ptr++; - size_t byte = (uint8_t)ptr[-1]; - // len is limited by 2gb not 4gb, hence 8 and not 16 as normally expected - // for a 32 bit varint. - if (UPB_UNLIKELY(byte >= 8)) return NULL; - *size += (byte - 1) << 28; - return ptr; + e->ptr = new_buf + new_size - (e->limit - e->ptr); + e->limit = new_buf + new_size; + e->buf = new_buf; + + e->ptr -= bytes; } +/* Call to ensure that at least "bytes" bytes are available for writing at + * e->ptr. Returns false if the bytes could not be allocated. */ UPB_FORCEINLINE -static const char* fastdecode_delimited( - upb_Decoder* d, const char* ptr, - upb_EpsCopyInputStream_ParseDelimitedFunc* func, void* ctx) { - ptr++; - - // Sign-extend so varint greater than one byte becomes negative, causing - // fast delimited parse to fail. - int len = (int8_t)ptr[-1]; - - if (!upb_EpsCopyInputStream_TryParseDelimitedFast(&d->input, &ptr, len, func, - ctx)) { - // Slow case: Sub-message is >=128 bytes and/or exceeds the current buffer. - // If it exceeds the buffer limit, limit/limit_ptr will change during - // sub-message parsing, so we need to preserve delta, not limit. - if (UPB_UNLIKELY(len & 0x80)) { - // Size varint >1 byte (length >= 128). - ptr = fastdecode_longsize(ptr, &len); - if (!ptr) { - // Corrupt wire format: size exceeded INT_MAX. - return NULL; - } - } - if (!upb_EpsCopyInputStream_CheckSize(&d->input, ptr, len)) { - // Corrupt wire format: invalid limit. - return NULL; - } - int delta = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, len); - ptr = func(&d->input, ptr, ctx); - upb_EpsCopyInputStream_PopLimit(&d->input, ptr, delta); +static void encode_reserve(upb_encstate* e, size_t bytes) { + if ((size_t)(e->ptr - e->buf) < bytes) { + encode_growbuffer(e, bytes); + return; } - return ptr; + + e->ptr -= bytes; } -/* singular, oneof, repeated field handling ***********************************/ +/* Writes the given bytes to the buffer, handling reserve/advance. */ +static void encode_bytes(upb_encstate* e, const void* data, size_t len) { + if (len == 0) return; /* memcpy() with zero size is UB */ + encode_reserve(e, len); + memcpy(e->ptr, data, len); +} -typedef struct { - upb_Array* arr; - void* end; -} fastdecode_arr; +static void encode_fixed64(upb_encstate* e, uint64_t val) { + val = UPB_PRIVATE(_upb_BigEndian64)(val); + encode_bytes(e, &val, sizeof(uint64_t)); +} -typedef enum { - FD_NEXT_ATLIMIT, - FD_NEXT_SAMEFIELD, - FD_NEXT_OTHERFIELD -} fastdecode_next; +static void encode_fixed32(upb_encstate* e, uint32_t val) { + val = UPB_PRIVATE(_upb_BigEndian32)(val); + encode_bytes(e, &val, sizeof(uint32_t)); +} -typedef struct { - void* dst; - fastdecode_next next; - uint32_t tag; -} fastdecode_nextret; +UPB_NOINLINE +static void encode_longvarint(upb_encstate* e, uint64_t val) { + size_t len; + char* start; -UPB_FORCEINLINE -static void* fastdecode_resizearr(upb_Decoder* d, void* dst, - fastdecode_arr* farr, int valbytes) { - if (UPB_UNLIKELY(dst == farr->end)) { - size_t old_capacity = farr->arr->UPB_PRIVATE(capacity); - size_t old_bytes = old_capacity * valbytes; - size_t new_capacity = old_capacity * 2; - size_t new_bytes = new_capacity * valbytes; - char* old_ptr = _upb_array_ptr(farr->arr); - char* new_ptr = upb_Arena_Realloc(&d->arena, old_ptr, old_bytes, new_bytes); - uint8_t elem_size_lg2 = __builtin_ctz(valbytes); - UPB_PRIVATE(_upb_Array_SetTaggedPtr)(farr->arr, new_ptr, elem_size_lg2); - farr->arr->UPB_PRIVATE(capacity) = new_capacity; - dst = (void*)(new_ptr + (old_capacity * valbytes)); - farr->end = (void*)(new_ptr + (new_capacity * valbytes)); - } - return dst; + encode_reserve(e, UPB_PB_VARINT_MAX_LEN); + len = encode_varint64(val, e->ptr); + start = e->ptr + UPB_PB_VARINT_MAX_LEN - len; + memmove(start, e->ptr, len); + e->ptr = start; } UPB_FORCEINLINE -static bool fastdecode_tagmatch(uint32_t tag, uint64_t data, int tagbytes) { - if (tagbytes == 1) { - return (uint8_t)tag == (uint8_t)data; +static void encode_varint(upb_encstate* e, uint64_t val) { + if (val < 128 && e->ptr != e->buf) { + --e->ptr; + *e->ptr = val; } else { - return (uint16_t)tag == (uint16_t)data; + encode_longvarint(e, val); } } -UPB_FORCEINLINE -static void fastdecode_commitarr(void* dst, fastdecode_arr* farr, - int valbytes) { - farr->arr->UPB_PRIVATE(size) = - (size_t)((char*)dst - (char*)_upb_array_ptr(farr->arr)) / valbytes; +static void encode_double(upb_encstate* e, double d) { + uint64_t u64; + UPB_ASSERT(sizeof(double) == sizeof(uint64_t)); + memcpy(&u64, &d, sizeof(uint64_t)); + encode_fixed64(e, u64); } -UPB_FORCEINLINE -static fastdecode_nextret fastdecode_nextrepeated(upb_Decoder* d, void* dst, - const char** ptr, - fastdecode_arr* farr, - uint64_t data, int tagbytes, - int valbytes) { - fastdecode_nextret ret; - dst = (char*)dst + valbytes; +static void encode_float(upb_encstate* e, float d) { + uint32_t u32; + UPB_ASSERT(sizeof(float) == sizeof(uint32_t)); + memcpy(&u32, &d, sizeof(uint32_t)); + encode_fixed32(e, u32); +} - if (UPB_LIKELY(!_upb_Decoder_IsDone(d, ptr))) { - ret.tag = _upb_FastDecoder_LoadTag(*ptr); - if (fastdecode_tagmatch(ret.tag, data, tagbytes)) { - ret.next = FD_NEXT_SAMEFIELD; - } else { - fastdecode_commitarr(dst, farr, valbytes); - ret.next = FD_NEXT_OTHERFIELD; +static void encode_tag(upb_encstate* e, uint32_t field_number, + uint8_t wire_type) { + encode_varint(e, (field_number << 3) | wire_type); +} + +static void encode_fixedarray(upb_encstate* e, const upb_Array* arr, + size_t elem_size, uint32_t tag) { + size_t bytes = arr->UPB_PRIVATE(size) * elem_size; + const char* data = _upb_array_constptr(arr); + const char* ptr = data + bytes - elem_size; + + if (tag || !UPB_PRIVATE(_upb_IsLittleEndian)()) { + while (true) { + if (elem_size == 4) { + uint32_t val; + memcpy(&val, ptr, sizeof(val)); + val = UPB_PRIVATE(_upb_BigEndian32)(val); + encode_bytes(e, &val, elem_size); + } else { + UPB_ASSERT(elem_size == 8); + uint64_t val; + memcpy(&val, ptr, sizeof(val)); + val = UPB_PRIVATE(_upb_BigEndian64)(val); + encode_bytes(e, &val, elem_size); + } + + if (tag) encode_varint(e, tag); + if (ptr == data) break; + ptr -= elem_size; } } else { - fastdecode_commitarr(dst, farr, valbytes); - ret.next = FD_NEXT_ATLIMIT; + encode_bytes(e, data, bytes); } - - ret.dst = dst; - return ret; } -UPB_FORCEINLINE -static void* fastdecode_fieldmem(upb_Message* msg, uint64_t data) { - size_t ofs = data >> 48; - return (char*)msg + ofs; +static void encode_message(upb_encstate* e, const upb_Message* msg, + const upb_MiniTable* m, size_t* size); + +static void encode_TaggedMessagePtr(upb_encstate* e, + upb_TaggedMessagePtr tagged, + const upb_MiniTable* m, size_t* size) { + if (upb_TaggedMessagePtr_IsEmpty(tagged)) { + m = UPB_PRIVATE(_upb_MiniTable_Empty)(); + } + encode_message(e, _upb_TaggedMessagePtr_GetMessage(tagged), m, size); } -UPB_FORCEINLINE -static void* fastdecode_getfield(upb_Decoder* d, const char* ptr, - upb_Message* msg, uint64_t* data, - uint64_t* hasbits, fastdecode_arr* farr, - int valbytes, upb_card card) { - switch (card) { - case CARD_s: { - uint8_t hasbit_index = *data >> 24; - // Set hasbit and return pointer to scalar field. - *hasbits |= 1ull << hasbit_index; - return fastdecode_fieldmem(msg, *data); +static void encode_scalar(upb_encstate* e, const void* _field_mem, + const upb_MiniTableSub* subs, + const upb_MiniTableField* f) { + const char* field_mem = _field_mem; + int wire_type; + +#define CASE(ctype, type, wtype, encodeval) \ + { \ + ctype val = *(ctype*)field_mem; \ + encode_##type(e, encodeval); \ + wire_type = wtype; \ + break; \ + } + + switch (f->UPB_PRIVATE(descriptortype)) { + case kUpb_FieldType_Double: + CASE(double, double, kUpb_WireType_64Bit, val); + case kUpb_FieldType_Float: + CASE(float, float, kUpb_WireType_32Bit, val); + case kUpb_FieldType_Int64: + case kUpb_FieldType_UInt64: + CASE(uint64_t, varint, kUpb_WireType_Varint, val); + case kUpb_FieldType_UInt32: + CASE(uint32_t, varint, kUpb_WireType_Varint, val); + case kUpb_FieldType_Int32: + case kUpb_FieldType_Enum: + CASE(int32_t, varint, kUpb_WireType_Varint, (int64_t)val); + case kUpb_FieldType_SFixed64: + case kUpb_FieldType_Fixed64: + CASE(uint64_t, fixed64, kUpb_WireType_64Bit, val); + case kUpb_FieldType_Fixed32: + case kUpb_FieldType_SFixed32: + CASE(uint32_t, fixed32, kUpb_WireType_32Bit, val); + case kUpb_FieldType_Bool: + CASE(bool, varint, kUpb_WireType_Varint, val); + case kUpb_FieldType_SInt32: + CASE(int32_t, varint, kUpb_WireType_Varint, encode_zz32(val)); + case kUpb_FieldType_SInt64: + CASE(int64_t, varint, kUpb_WireType_Varint, encode_zz64(val)); + case kUpb_FieldType_String: + case kUpb_FieldType_Bytes: { + upb_StringView view = *(upb_StringView*)field_mem; + encode_bytes(e, view.data, view.size); + encode_varint(e, view.size); + wire_type = kUpb_WireType_Delimited; + break; } - case CARD_o: { - uint16_t case_ofs = *data >> 32; - uint32_t* oneof_case = UPB_PTR_AT(msg, case_ofs, uint32_t); - uint8_t field_number = *data >> 24; - *oneof_case = field_number; - return fastdecode_fieldmem(msg, *data); + case kUpb_FieldType_Group: { + size_t size; + upb_TaggedMessagePtr submsg = *(upb_TaggedMessagePtr*)field_mem; + const upb_MiniTable* subm = + upb_MiniTableSub_Message(subs[f->UPB_PRIVATE(submsg_index)]); + if (submsg == 0) { + return; + } + if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded); + encode_tag(e, f->UPB_PRIVATE(number), kUpb_WireType_EndGroup); + encode_TaggedMessagePtr(e, submsg, subm, &size); + wire_type = kUpb_WireType_StartGroup; + e->depth++; + break; } - case CARD_r: { - // Get pointer to upb_Array and allocate/expand if necessary. - uint8_t elem_size_lg2 = __builtin_ctz(valbytes); - upb_Array** arr_p = fastdecode_fieldmem(msg, *data); - char* begin; - *(uint32_t*)msg |= *hasbits; - *hasbits = 0; - if (UPB_LIKELY(!*arr_p)) { - farr->arr = UPB_PRIVATE(_upb_Array_New)(&d->arena, 8, elem_size_lg2); - *arr_p = farr->arr; - } else { - farr->arr = *arr_p; + case kUpb_FieldType_Message: { + size_t size; + upb_TaggedMessagePtr submsg = *(upb_TaggedMessagePtr*)field_mem; + const upb_MiniTable* subm = + upb_MiniTableSub_Message(subs[f->UPB_PRIVATE(submsg_index)]); + if (submsg == 0) { + return; } - begin = _upb_array_ptr(farr->arr); - farr->end = begin + (farr->arr->UPB_PRIVATE(capacity) * valbytes); - *data = _upb_FastDecoder_LoadTag(ptr); - return begin + (farr->arr->UPB_PRIVATE(size) * valbytes); + if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded); + encode_TaggedMessagePtr(e, submsg, subm, &size); + encode_varint(e, size); + wire_type = kUpb_WireType_Delimited; + e->depth++; + break; } default: UPB_UNREACHABLE(); } -} +#undef CASE -UPB_FORCEINLINE -static bool fastdecode_flippacked(uint64_t* data, int tagbytes) { - *data ^= (0x2 ^ 0x0); // Patch data to match packed wiretype. - return fastdecode_checktag(*data, tagbytes); + encode_tag(e, f->UPB_PRIVATE(number), wire_type); } -#define FASTDECODE_CHECKPACKED(tagbytes, card, func) \ - if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ - if (card == CARD_r && fastdecode_flippacked(&data, tagbytes)) { \ - UPB_MUSTTAIL return func(UPB_PARSE_ARGS); \ - } \ - RETURN_GENERIC("packed check tag mismatch\n"); \ - } - -/* varint fields **************************************************************/ +static void encode_array(upb_encstate* e, const upb_Message* msg, + const upb_MiniTableSub* subs, + const upb_MiniTableField* f) { + const upb_Array* arr = *UPB_PTR_AT(msg, f->UPB_PRIVATE(offset), upb_Array*); + bool packed = upb_MiniTableField_IsPacked(f); + size_t pre_len = e->limit - e->ptr; -UPB_FORCEINLINE -static uint64_t fastdecode_munge(uint64_t val, int valbytes, bool zigzag) { - if (valbytes == 1) { - return val != 0; - } else if (zigzag) { - if (valbytes == 4) { - uint32_t n = val; - return (n >> 1) ^ -(int32_t)(n & 1); - } else if (valbytes == 8) { - return (val >> 1) ^ -(int64_t)(val & 1); - } - UPB_UNREACHABLE(); + if (arr == NULL || arr->UPB_PRIVATE(size) == 0) { + return; } - return val; -} -UPB_FORCEINLINE -static const char* fastdecode_varint64(const char* ptr, uint64_t* val) { - ptr++; - *val = (uint8_t)ptr[-1]; - if (UPB_UNLIKELY(*val & 0x80)) { - int i; - for (i = 0; i < 8; i++) { - ptr++; - uint64_t byte = (uint8_t)ptr[-1]; - *val += (byte - 1) << (7 + 7 * i); - if (UPB_LIKELY((byte & 0x80) == 0)) goto done; - } - ptr++; - uint64_t byte = (uint8_t)ptr[-1]; - if (byte > 1) { - return NULL; - } - *val += (byte - 1) << 63; - } -done: - UPB_ASSUME(ptr != NULL); - return ptr; -} +#define VARINT_CASE(ctype, encode) \ + { \ + const ctype* start = _upb_array_constptr(arr); \ + const ctype* ptr = start + arr->UPB_PRIVATE(size); \ + uint32_t tag = \ + packed ? 0 : (f->UPB_PRIVATE(number) << 3) | kUpb_WireType_Varint; \ + do { \ + ptr--; \ + encode_varint(e, encode); \ + if (tag) encode_varint(e, tag); \ + } while (ptr != start); \ + } \ + break; -#define FASTDECODE_UNPACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, card, zigzag, packed) \ - uint64_t val; \ - void* dst; \ - fastdecode_arr farr; \ - \ - FASTDECODE_CHECKPACKED(tagbytes, card, packed); \ - \ - dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes, \ - card); \ - if (card == CARD_r) { \ - if (UPB_UNLIKELY(!dst)) { \ - RETURN_GENERIC("need array resize\n"); \ - } \ - } \ - \ - again: \ - if (card == CARD_r) { \ - dst = fastdecode_resizearr(d, dst, &farr, valbytes); \ - } \ - \ - ptr += tagbytes; \ - ptr = fastdecode_varint64(ptr, &val); \ - if (ptr == NULL) _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ - val = fastdecode_munge(val, valbytes, zigzag); \ - memcpy(dst, &val, valbytes); \ - \ - if (card == CARD_r) { \ - fastdecode_nextret ret = fastdecode_nextrepeated( \ - d, dst, &ptr, &farr, data, tagbytes, valbytes); \ - switch (ret.next) { \ - case FD_NEXT_SAMEFIELD: \ - dst = ret.dst; \ - goto again; \ - case FD_NEXT_OTHERFIELD: \ - data = ret.tag; \ - UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ - case FD_NEXT_ATLIMIT: \ - return ptr; \ - } \ - } \ - \ - UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); - -typedef struct { - uint8_t valbytes; - bool zigzag; - void* dst; - fastdecode_arr farr; -} fastdecode_varintdata; - -UPB_FORCEINLINE -static const char* fastdecode_topackedvarint(upb_EpsCopyInputStream* e, - const char* ptr, void* ctx) { - upb_Decoder* d = (upb_Decoder*)e; - fastdecode_varintdata* data = ctx; - void* dst = data->dst; - uint64_t val; +#define TAG(wire_type) (packed ? 0 : (f->UPB_PRIVATE(number) << 3 | wire_type)) - while (!_upb_Decoder_IsDone(d, &ptr)) { - dst = fastdecode_resizearr(d, dst, &data->farr, data->valbytes); - ptr = fastdecode_varint64(ptr, &val); - if (ptr == NULL) return NULL; - val = fastdecode_munge(val, data->valbytes, data->zigzag); - memcpy(dst, &val, data->valbytes); - dst = (char*)dst + data->valbytes; + switch (f->UPB_PRIVATE(descriptortype)) { + case kUpb_FieldType_Double: + encode_fixedarray(e, arr, sizeof(double), TAG(kUpb_WireType_64Bit)); + break; + case kUpb_FieldType_Float: + encode_fixedarray(e, arr, sizeof(float), TAG(kUpb_WireType_32Bit)); + break; + case kUpb_FieldType_SFixed64: + case kUpb_FieldType_Fixed64: + encode_fixedarray(e, arr, sizeof(uint64_t), TAG(kUpb_WireType_64Bit)); + break; + case kUpb_FieldType_Fixed32: + case kUpb_FieldType_SFixed32: + encode_fixedarray(e, arr, sizeof(uint32_t), TAG(kUpb_WireType_32Bit)); + break; + case kUpb_FieldType_Int64: + case kUpb_FieldType_UInt64: + VARINT_CASE(uint64_t, *ptr); + case kUpb_FieldType_UInt32: + VARINT_CASE(uint32_t, *ptr); + case kUpb_FieldType_Int32: + case kUpb_FieldType_Enum: + VARINT_CASE(int32_t, (int64_t)*ptr); + case kUpb_FieldType_Bool: + VARINT_CASE(bool, *ptr); + case kUpb_FieldType_SInt32: + VARINT_CASE(int32_t, encode_zz32(*ptr)); + case kUpb_FieldType_SInt64: + VARINT_CASE(int64_t, encode_zz64(*ptr)); + case kUpb_FieldType_String: + case kUpb_FieldType_Bytes: { + const upb_StringView* start = _upb_array_constptr(arr); + const upb_StringView* ptr = start + arr->UPB_PRIVATE(size); + do { + ptr--; + encode_bytes(e, ptr->data, ptr->size); + encode_varint(e, ptr->size); + encode_tag(e, f->UPB_PRIVATE(number), kUpb_WireType_Delimited); + } while (ptr != start); + return; + } + case kUpb_FieldType_Group: { + const upb_TaggedMessagePtr* start = _upb_array_constptr(arr); + const upb_TaggedMessagePtr* ptr = start + arr->UPB_PRIVATE(size); + const upb_MiniTable* subm = + upb_MiniTableSub_Message(subs[f->UPB_PRIVATE(submsg_index)]); + if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded); + do { + size_t size; + ptr--; + encode_tag(e, f->UPB_PRIVATE(number), kUpb_WireType_EndGroup); + encode_TaggedMessagePtr(e, *ptr, subm, &size); + encode_tag(e, f->UPB_PRIVATE(number), kUpb_WireType_StartGroup); + } while (ptr != start); + e->depth++; + return; + } + case kUpb_FieldType_Message: { + const upb_TaggedMessagePtr* start = _upb_array_constptr(arr); + const upb_TaggedMessagePtr* ptr = start + arr->UPB_PRIVATE(size); + const upb_MiniTable* subm = + upb_MiniTableSub_Message(subs[f->UPB_PRIVATE(submsg_index)]); + if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded); + do { + size_t size; + ptr--; + encode_TaggedMessagePtr(e, *ptr, subm, &size); + encode_varint(e, size); + encode_tag(e, f->UPB_PRIVATE(number), kUpb_WireType_Delimited); + } while (ptr != start); + e->depth++; + return; + } } +#undef VARINT_CASE - fastdecode_commitarr(dst, &data->farr, data->valbytes); - return ptr; + if (packed) { + encode_varint(e, e->limit - e->ptr - pre_len); + encode_tag(e, f->UPB_PRIVATE(number), kUpb_WireType_Delimited); + } } -#define FASTDECODE_PACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, zigzag, unpacked) \ - fastdecode_varintdata ctx = {valbytes, zigzag}; \ - \ - FASTDECODE_CHECKPACKED(tagbytes, CARD_r, unpacked); \ - \ - ctx.dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &ctx.farr, \ - valbytes, CARD_r); \ - if (UPB_UNLIKELY(!ctx.dst)) { \ - RETURN_GENERIC("need array resize\n"); \ - } \ - \ - ptr += tagbytes; \ - ptr = fastdecode_delimited(d, ptr, &fastdecode_topackedvarint, &ctx); \ - \ - if (UPB_UNLIKELY(ptr == NULL)) { \ - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ - } \ - \ - UPB_MUSTTAIL return fastdecode_dispatch(d, ptr, msg, table, hasbits, 0); +static void encode_mapentry(upb_encstate* e, uint32_t number, + const upb_MiniTable* layout, + const upb_MapEntry* ent) { + const upb_MiniTableField* key_field = &layout->UPB_PRIVATE(fields)[0]; + const upb_MiniTableField* val_field = &layout->UPB_PRIVATE(fields)[1]; + size_t pre_len = e->limit - e->ptr; + size_t size; + encode_scalar(e, &ent->data.v, layout->UPB_PRIVATE(subs), val_field); + encode_scalar(e, &ent->data.k, layout->UPB_PRIVATE(subs), key_field); + size = (e->limit - e->ptr) - pre_len; + encode_varint(e, size); + encode_tag(e, number, kUpb_WireType_Delimited); +} -#define FASTDECODE_VARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, card, zigzag, unpacked, packed) \ - if (card == CARD_p) { \ - FASTDECODE_PACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, zigzag, unpacked); \ - } else { \ - FASTDECODE_UNPACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, card, zigzag, packed); \ - } +static void encode_map(upb_encstate* e, const upb_Message* msg, + const upb_MiniTableSub* subs, + const upb_MiniTableField* f) { + const upb_Map* map = *UPB_PTR_AT(msg, f->UPB_PRIVATE(offset), const upb_Map*); + const upb_MiniTable* layout = + upb_MiniTableSub_Message(subs[f->UPB_PRIVATE(submsg_index)]); + UPB_ASSERT(layout->UPB_PRIVATE(field_count) == 2); -#define z_ZZ true -#define b_ZZ false -#define v_ZZ false + if (map == NULL) return; -/* Generate all combinations: - * {s,o,r,p} x {b1,v4,z4,v8,z8} x {1bt,2bt} */ + if (e->options & kUpb_EncodeOption_Deterministic) { + _upb_sortedmap sorted; + _upb_mapsorter_pushmap( + &e->sorter, layout->UPB_PRIVATE(fields)[0].UPB_PRIVATE(descriptortype), + map, &sorted); + upb_MapEntry ent; + while (_upb_sortedmap_next(&e->sorter, map, &sorted, &ent)) { + encode_mapentry(e, f->UPB_PRIVATE(number), layout, &ent); + } + _upb_mapsorter_popmap(&e->sorter, &sorted); + } else { + intptr_t iter = UPB_STRTABLE_BEGIN; + upb_StringView key; + upb_value val; + while (upb_strtable_next2(&map->table, &key, &val, &iter)) { + upb_MapEntry ent; + _upb_map_fromkey(key, &ent.data.k, map->key_size); + _upb_map_fromvalue(val, &ent.data.v, map->val_size); + encode_mapentry(e, f->UPB_PRIVATE(number), layout, &ent); + } + } +} -#define F(card, type, valbytes, tagbytes) \ - UPB_NOINLINE \ - const char* upb_p##card##type##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ - FASTDECODE_VARINT(d, ptr, msg, table, hasbits, data, tagbytes, valbytes, \ - CARD_##card, type##_ZZ, \ - upb_pr##type##valbytes##_##tagbytes##bt, \ - upb_pp##type##valbytes##_##tagbytes##bt); \ +static bool encode_shouldencode(upb_encstate* e, const upb_Message* msg, + const upb_MiniTableSub* subs, + const upb_MiniTableField* f) { + if (f->presence == 0) { + // Proto3 presence or map/array. + const void* mem = UPB_PTR_AT(msg, f->UPB_PRIVATE(offset), void); + switch (UPB_PRIVATE(_upb_MiniTableField_GetRep)(f)) { + case kUpb_FieldRep_1Byte: { + char ch; + memcpy(&ch, mem, 1); + return ch != 0; + } + case kUpb_FieldRep_4Byte: { + uint32_t u32; + memcpy(&u32, mem, 4); + return u32 != 0; + } + case kUpb_FieldRep_8Byte: { + uint64_t u64; + memcpy(&u64, mem, 8); + return u64 != 0; + } + case kUpb_FieldRep_StringView: { + const upb_StringView* str = (const upb_StringView*)mem; + return str->size != 0; + } + default: + UPB_UNREACHABLE(); + } + } else if (f->presence > 0) { + // Proto2 presence: hasbit. + return UPB_PRIVATE(_upb_Message_GetHasbit)(msg, f); + } else { + // Field is in a oneof. + return UPB_PRIVATE(_upb_Message_GetOneofCase)(msg, f) == + f->UPB_PRIVATE(number); } +} -#define TYPES(card, tagbytes) \ - F(card, b, 1, tagbytes) \ - F(card, v, 4, tagbytes) \ - F(card, v, 8, tagbytes) \ - F(card, z, 4, tagbytes) \ - F(card, z, 8, tagbytes) - -#define TAGBYTES(card) \ - TYPES(card, 1) \ - TYPES(card, 2) +static void encode_field(upb_encstate* e, const upb_Message* msg, + const upb_MiniTableSub* subs, + const upb_MiniTableField* field) { + switch (UPB_PRIVATE(_upb_MiniTableField_Mode)(field)) { + case kUpb_FieldMode_Array: + encode_array(e, msg, subs, field); + break; + case kUpb_FieldMode_Map: + encode_map(e, msg, subs, field); + break; + case kUpb_FieldMode_Scalar: + encode_scalar(e, UPB_PTR_AT(msg, field->UPB_PRIVATE(offset), void), subs, + field); + break; + default: + UPB_UNREACHABLE(); + } +} -TAGBYTES(s) -TAGBYTES(o) -TAGBYTES(r) -TAGBYTES(p) +static void encode_msgset_item(upb_encstate* e, const upb_Extension* ext) { + size_t size; + encode_tag(e, kUpb_MsgSet_Item, kUpb_WireType_EndGroup); + encode_message(e, ext->data.ptr, + upb_MiniTableExtension_GetSubMessage(ext->ext), &size); + encode_varint(e, size); + encode_tag(e, kUpb_MsgSet_Message, kUpb_WireType_Delimited); + encode_varint(e, upb_MiniTableExtension_Number(ext->ext)); + encode_tag(e, kUpb_MsgSet_TypeId, kUpb_WireType_Varint); + encode_tag(e, kUpb_MsgSet_Item, kUpb_WireType_StartGroup); +} -#undef z_ZZ -#undef b_ZZ -#undef v_ZZ -#undef o_ONEOF -#undef s_ONEOF -#undef r_ONEOF -#undef F -#undef TYPES -#undef TAGBYTES -#undef FASTDECODE_UNPACKEDVARINT -#undef FASTDECODE_PACKEDVARINT -#undef FASTDECODE_VARINT +static void encode_ext(upb_encstate* e, const upb_Extension* ext, + bool is_message_set) { + if (UPB_UNLIKELY(is_message_set)) { + encode_msgset_item(e, ext); + } else { + encode_field(e, (upb_Message*)&ext->data, &ext->ext->UPB_PRIVATE(sub), + &ext->ext->UPB_PRIVATE(field)); + } +} -/* fixed fields ***************************************************************/ +static void encode_message(upb_encstate* e, const upb_Message* msg, + const upb_MiniTable* m, size_t* size) { + size_t pre_len = e->limit - e->ptr; -#define FASTDECODE_UNPACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, card, packed) \ - void* dst; \ - fastdecode_arr farr; \ - \ - FASTDECODE_CHECKPACKED(tagbytes, card, packed) \ - \ - dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes, \ - card); \ - if (card == CARD_r) { \ - if (UPB_UNLIKELY(!dst)) { \ - RETURN_GENERIC("couldn't allocate array in arena\n"); \ - } \ - } \ - \ - again: \ - if (card == CARD_r) { \ - dst = fastdecode_resizearr(d, dst, &farr, valbytes); \ - } \ - \ - ptr += tagbytes; \ - memcpy(dst, ptr, valbytes); \ - ptr += valbytes; \ - \ - if (card == CARD_r) { \ - fastdecode_nextret ret = fastdecode_nextrepeated( \ - d, dst, &ptr, &farr, data, tagbytes, valbytes); \ - switch (ret.next) { \ - case FD_NEXT_SAMEFIELD: \ - dst = ret.dst; \ - goto again; \ - case FD_NEXT_OTHERFIELD: \ - data = ret.tag; \ - UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ - case FD_NEXT_ATLIMIT: \ - return ptr; \ - } \ - } \ - \ - UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); + if ((e->options & kUpb_EncodeOption_CheckRequired) && + m->UPB_PRIVATE(required_count)) { + uint64_t msg_head; + memcpy(&msg_head, msg, 8); + msg_head = UPB_PRIVATE(_upb_BigEndian64)(msg_head); + if (UPB_PRIVATE(_upb_MiniTable_RequiredMask)(m) & ~msg_head) { + encode_err(e, kUpb_EncodeStatus_MissingRequired); + } + } -#define FASTDECODE_PACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, unpacked) \ - FASTDECODE_CHECKPACKED(tagbytes, CARD_r, unpacked) \ - \ - ptr += tagbytes; \ - int size = (uint8_t)ptr[0]; \ - ptr++; \ - if (size & 0x80) { \ - ptr = fastdecode_longsize(ptr, &size); \ - } \ - \ - if (UPB_UNLIKELY(!upb_EpsCopyInputStream_CheckDataSizeAvailable( \ - &d->input, ptr, size) || \ - (size % valbytes) != 0)) { \ - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ - } \ - \ - upb_Array** arr_p = fastdecode_fieldmem(msg, data); \ - upb_Array* arr = *arr_p; \ - uint8_t elem_size_lg2 = __builtin_ctz(valbytes); \ - int elems = size / valbytes; \ - \ - if (UPB_LIKELY(!arr)) { \ - *arr_p = arr = \ - UPB_PRIVATE(_upb_Array_New)(&d->arena, elems, elem_size_lg2); \ - if (!arr) { \ - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ - } \ - } else { \ - _upb_Array_ResizeUninitialized(arr, elems, &d->arena); \ - } \ - \ - char* dst = _upb_array_ptr(arr); \ - memcpy(dst, ptr, size); \ - arr->UPB_PRIVATE(size) = elems; \ - \ - ptr += size; \ - UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); + if ((e->options & kUpb_EncodeOption_SkipUnknown) == 0) { + size_t unknown_size; + const char* unknown = upb_Message_GetUnknown(msg, &unknown_size); -#define FASTDECODE_FIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, card, unpacked, packed) \ - if (card == CARD_p) { \ - FASTDECODE_PACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, unpacked); \ - } else { \ - FASTDECODE_UNPACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, card, packed); \ + if (unknown) { + encode_bytes(e, unknown, unknown_size); + } } -/* Generate all combinations: - * {s,o,r,p} x {f4,f8} x {1bt,2bt} */ - -#define F(card, valbytes, tagbytes) \ - UPB_NOINLINE \ - const char* upb_p##card##f##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ - FASTDECODE_FIXED(d, ptr, msg, table, hasbits, data, tagbytes, valbytes, \ - CARD_##card, upb_ppf##valbytes##_##tagbytes##bt, \ - upb_prf##valbytes##_##tagbytes##bt); \ + if (m->UPB_PRIVATE(ext) != kUpb_ExtMode_NonExtendable) { + /* Encode all extensions together. Unlike C++, we do not attempt to keep + * these in field number order relative to normal fields or even to each + * other. */ + size_t ext_count; + const upb_Extension* ext = + UPB_PRIVATE(_upb_Message_Getexts)(msg, &ext_count); + if (ext_count) { + if (e->options & kUpb_EncodeOption_Deterministic) { + _upb_sortedmap sorted; + _upb_mapsorter_pushexts(&e->sorter, ext, ext_count, &sorted); + while (_upb_sortedmap_nextext(&e->sorter, &sorted, &ext)) { + encode_ext(e, ext, m->UPB_PRIVATE(ext) == kUpb_ExtMode_IsMessageSet); + } + _upb_mapsorter_popmap(&e->sorter, &sorted); + } else { + const upb_Extension* end = ext + ext_count; + for (; ext != end; ext++) { + encode_ext(e, ext, m->UPB_PRIVATE(ext) == kUpb_ExtMode_IsMessageSet); + } + } + } } -#define TYPES(card, tagbytes) \ - F(card, 4, tagbytes) \ - F(card, 8, tagbytes) + if (m->UPB_PRIVATE(field_count)) { + const upb_MiniTableField* f = + &m->UPB_PRIVATE(fields)[m->UPB_PRIVATE(field_count)]; + const upb_MiniTableField* first = &m->UPB_PRIVATE(fields)[0]; + while (f != first) { + f--; + if (encode_shouldencode(e, msg, m->UPB_PRIVATE(subs), f)) { + encode_field(e, msg, m->UPB_PRIVATE(subs), f); + } + } + } -#define TAGBYTES(card) \ - TYPES(card, 1) \ - TYPES(card, 2) + *size = (e->limit - e->ptr) - pre_len; +} -TAGBYTES(s) -TAGBYTES(o) -TAGBYTES(r) -TAGBYTES(p) +static upb_EncodeStatus upb_Encoder_Encode(upb_encstate* const encoder, + const upb_Message* const msg, + const upb_MiniTable* const l, + char** const buf, + size_t* const size) { + // Unfortunately we must continue to perform hackery here because there are + // code paths which blindly copy the returned pointer without bothering to + // check for errors until much later (b/235839510). So we still set *buf to + // NULL on error and we still set it to non-NULL on a successful empty result. + if (UPB_SETJMP(encoder->err) == 0) { + encode_message(encoder, msg, l, size); + *size = encoder->limit - encoder->ptr; + if (*size == 0) { + static char ch; + *buf = &ch; + } else { + UPB_ASSERT(encoder->ptr); + *buf = encoder->ptr; + } + } else { + UPB_ASSERT(encoder->status != kUpb_EncodeStatus_Ok); + *buf = NULL; + *size = 0; + } -#undef F -#undef TYPES -#undef TAGBYTES -#undef FASTDECODE_UNPACKEDFIXED -#undef FASTDECODE_PACKEDFIXED + _upb_mapsorter_destroy(&encoder->sorter); + return encoder->status; +} -/* string fields **************************************************************/ +upb_EncodeStatus upb_Encode(const upb_Message* msg, const upb_MiniTable* l, + int options, upb_Arena* arena, char** buf, + size_t* size) { + upb_encstate e; + unsigned depth = (unsigned)options >> 16; -typedef const char* fastdecode_copystr_func(struct upb_Decoder* d, - const char* ptr, upb_Message* msg, - const upb_MiniTable* table, - uint64_t hasbits, - upb_StringView* dst); + e.status = kUpb_EncodeStatus_Ok; + e.arena = arena; + e.buf = NULL; + e.limit = NULL; + e.ptr = NULL; + e.depth = depth ? depth : kUpb_WireFormat_DefaultDepthLimit; + e.options = options; + _upb_mapsorter_init(&e.sorter); -UPB_NOINLINE -static const char* fastdecode_verifyutf8(upb_Decoder* d, const char* ptr, - upb_Message* msg, intptr_t table, - uint64_t hasbits, uint64_t data) { - upb_StringView* dst = (upb_StringView*)data; - if (!_upb_Decoder_VerifyUtf8Inline(dst->data, dst->size)) { - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); - } - UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); + return upb_Encoder_Encode(&e, msg, l, buf, size); } -#define FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, validate_utf8) \ - int size = (uint8_t)ptr[0]; /* Could plumb through hasbits. */ \ - ptr++; \ - if (size & 0x80) { \ - ptr = fastdecode_longsize(ptr, &size); \ - } \ - \ - if (UPB_UNLIKELY(!upb_EpsCopyInputStream_CheckSize(&d->input, ptr, size))) { \ - dst->size = 0; \ - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ - } \ - \ - const char* s_ptr = ptr; \ - ptr = upb_EpsCopyInputStream_ReadString(&d->input, &s_ptr, size, &d->arena); \ - if (!ptr) _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); \ - dst->data = s_ptr; \ - dst->size = size; \ - \ - if (validate_utf8) { \ - data = (uint64_t)dst; \ - UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ - } else { \ - UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \ - } +// Fast decoder: ~3x the speed of decode.c, but requires x86-64/ARM64. +// Also the table size grows by 2x. +// +// Could potentially be ported to other 64-bit archs that pass at least six +// arguments in registers and have 8 unused high bits in pointers. +// +// The overall design is to create specialized functions for every possible +// field type (eg. oneof boolean field with a 1 byte tag) and then dispatch +// to the specialized function as quickly as possible. + + + +// Must be last. + +#if UPB_FASTTABLE + +// The standard set of arguments passed to each parsing function. +// Thanks to x86-64 calling conventions, these will stay in registers. +#define UPB_PARSE_PARAMS \ + upb_Decoder *d, const char *ptr, upb_Message *msg, intptr_t table, \ + uint64_t hasbits, uint64_t data + +#define UPB_PARSE_ARGS d, ptr, msg, table, hasbits, data + +#define RETURN_GENERIC(m) \ + /* Uncomment either of these for debugging purposes. */ \ + /* fprintf(stderr, m); */ \ + /*__builtin_trap(); */ \ + return _upb_FastDecoder_DecodeGeneric(d, ptr, msg, table, hasbits, 0); + +typedef enum { + CARD_s = 0, /* Singular (optional, non-repeated) */ + CARD_o = 1, /* Oneof */ + CARD_r = 2, /* Repeated */ + CARD_p = 3 /* Packed Repeated */ +} upb_card; UPB_NOINLINE -static const char* fastdecode_longstring_utf8(struct upb_Decoder* d, - const char* ptr, upb_Message* msg, - intptr_t table, uint64_t hasbits, - uint64_t data) { - upb_StringView* dst = (upb_StringView*)data; - FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, true); +static const char* fastdecode_isdonefallback(UPB_PARSE_PARAMS) { + int overrun = data; + ptr = _upb_EpsCopyInputStream_IsDoneFallbackInline( + &d->input, ptr, overrun, _upb_Decoder_BufferFlipCallback); + data = _upb_FastDecoder_LoadTag(ptr); + UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); } -UPB_NOINLINE -static const char* fastdecode_longstring_noutf8( - struct upb_Decoder* d, const char* ptr, upb_Message* msg, intptr_t table, - uint64_t hasbits, uint64_t data) { - upb_StringView* dst = (upb_StringView*)data; - FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, false); +UPB_FORCEINLINE +static const char* fastdecode_dispatch(UPB_PARSE_PARAMS) { + int overrun; + switch (upb_EpsCopyInputStream_IsDoneStatus(&d->input, ptr, &overrun)) { + case kUpb_IsDoneStatus_Done: + *(uint32_t*)msg |= hasbits; // Sync hasbits. + const upb_MiniTable* m = decode_totablep(table); + return UPB_UNLIKELY(m->UPB_PRIVATE(required_count)) + ? _upb_Decoder_CheckRequired(d, ptr, msg, m) + : ptr; + case kUpb_IsDoneStatus_NotDone: + break; + case kUpb_IsDoneStatus_NeedFallback: + data = overrun; + UPB_MUSTTAIL return fastdecode_isdonefallback(UPB_PARSE_ARGS); + } + + // Read two bytes of tag data (for a one-byte tag, the high byte is junk). + data = _upb_FastDecoder_LoadTag(ptr); + UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); } UPB_FORCEINLINE -static void fastdecode_docopy(upb_Decoder* d, const char* ptr, uint32_t size, - int copy, char* data, size_t data_offset, - upb_StringView* dst) { - d->arena.UPB_PRIVATE(ptr) += copy; - dst->data = data + data_offset; - UPB_UNPOISON_MEMORY_REGION(data, copy); - memcpy(data, ptr, copy); - UPB_POISON_MEMORY_REGION(data + data_offset + size, - copy - data_offset - size); +static bool fastdecode_checktag(uint16_t data, int tagbytes) { + if (tagbytes == 1) { + return (data & 0xff) == 0; + } else { + return data == 0; + } } -#define FASTDECODE_COPYSTRING(d, ptr, msg, table, hasbits, data, tagbytes, \ - card, validate_utf8) \ - upb_StringView* dst; \ - fastdecode_arr farr; \ - int64_t size; \ - size_t arena_has; \ - size_t common_has; \ - char* buf; \ - \ - UPB_ASSERT(!upb_EpsCopyInputStream_AliasingAvailable(&d->input, ptr, 0)); \ - UPB_ASSERT(fastdecode_checktag(data, tagbytes)); \ - \ - dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ - sizeof(upb_StringView), card); \ - \ - again: \ - if (card == CARD_r) { \ - dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_StringView)); \ - } \ - \ - size = (uint8_t)ptr[tagbytes]; \ - ptr += tagbytes + 1; \ - dst->size = size; \ - \ - buf = d->arena.UPB_PRIVATE(ptr); \ - arena_has = UPB_PRIVATE(_upb_ArenaHas)(&d->arena); \ - common_has = UPB_MIN(arena_has, \ - upb_EpsCopyInputStream_BytesAvailable(&d->input, ptr)); \ - \ - if (UPB_LIKELY(size <= 15 - tagbytes)) { \ - if (arena_has < 16) goto longstr; \ - fastdecode_docopy(d, ptr - tagbytes - 1, size, 16, buf, tagbytes + 1, \ - dst); \ - } else if (UPB_LIKELY(size <= 32)) { \ - if (UPB_UNLIKELY(common_has < 32)) goto longstr; \ - fastdecode_docopy(d, ptr, size, 32, buf, 0, dst); \ - } else if (UPB_LIKELY(size <= 64)) { \ - if (UPB_UNLIKELY(common_has < 64)) goto longstr; \ - fastdecode_docopy(d, ptr, size, 64, buf, 0, dst); \ - } else if (UPB_LIKELY(size < 128)) { \ - if (UPB_UNLIKELY(common_has < 128)) goto longstr; \ - fastdecode_docopy(d, ptr, size, 128, buf, 0, dst); \ - } else { \ - goto longstr; \ - } \ - \ - ptr += size; \ - \ - if (card == CARD_r) { \ - if (validate_utf8 && \ - !_upb_Decoder_VerifyUtf8Inline(dst->data, dst->size)) { \ - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); \ - } \ - fastdecode_nextret ret = fastdecode_nextrepeated( \ - d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_StringView)); \ - switch (ret.next) { \ - case FD_NEXT_SAMEFIELD: \ - dst = ret.dst; \ - goto again; \ - case FD_NEXT_OTHERFIELD: \ - data = ret.tag; \ - UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ - case FD_NEXT_ATLIMIT: \ - return ptr; \ - } \ - } \ - \ - if (card != CARD_r && validate_utf8) { \ - data = (uint64_t)dst; \ - UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ - } \ - \ - UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \ - \ - longstr: \ - if (card == CARD_r) { \ - fastdecode_commitarr(dst + 1, &farr, sizeof(upb_StringView)); \ - } \ - ptr--; \ - if (validate_utf8) { \ - UPB_MUSTTAIL return fastdecode_longstring_utf8(d, ptr, msg, table, \ - hasbits, (uint64_t)dst); \ - } else { \ - UPB_MUSTTAIL return fastdecode_longstring_noutf8(d, ptr, msg, table, \ - hasbits, (uint64_t)dst); \ +UPB_FORCEINLINE +static const char* fastdecode_longsize(const char* ptr, int* size) { + int i; + UPB_ASSERT(*size & 0x80); + *size &= 0xff; + for (i = 0; i < 3; i++) { + ptr++; + size_t byte = (uint8_t)ptr[-1]; + *size += (byte - 1) << (7 + 7 * i); + if (UPB_LIKELY((byte & 0x80) == 0)) return ptr; } + ptr++; + size_t byte = (uint8_t)ptr[-1]; + // len is limited by 2gb not 4gb, hence 8 and not 16 as normally expected + // for a 32 bit varint. + if (UPB_UNLIKELY(byte >= 8)) return NULL; + *size += (byte - 1) << 28; + return ptr; +} -#define FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes, card, \ - copyfunc, validate_utf8) \ - upb_StringView* dst; \ - fastdecode_arr farr; \ - int64_t size; \ - \ - if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ - RETURN_GENERIC("string field tag mismatch\n"); \ - } \ - \ - if (UPB_UNLIKELY( \ - !upb_EpsCopyInputStream_AliasingAvailable(&d->input, ptr, 0))) { \ - UPB_MUSTTAIL return copyfunc(UPB_PARSE_ARGS); \ - } \ - \ - dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ - sizeof(upb_StringView), card); \ - \ - again: \ - if (card == CARD_r) { \ - dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_StringView)); \ - } \ - \ - size = (int8_t)ptr[tagbytes]; \ - ptr += tagbytes + 1; \ - \ - if (UPB_UNLIKELY( \ - !upb_EpsCopyInputStream_AliasingAvailable(&d->input, ptr, size))) { \ - ptr--; \ - if (validate_utf8) { \ - return fastdecode_longstring_utf8(d, ptr, msg, table, hasbits, \ - (uint64_t)dst); \ - } else { \ - return fastdecode_longstring_noutf8(d, ptr, msg, table, hasbits, \ - (uint64_t)dst); \ - } \ - } \ - \ - dst->data = ptr; \ - dst->size = size; \ - ptr = upb_EpsCopyInputStream_ReadStringAliased(&d->input, &dst->data, \ - dst->size); \ - \ - if (card == CARD_r) { \ - if (validate_utf8 && \ - !_upb_Decoder_VerifyUtf8Inline(dst->data, dst->size)) { \ - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); \ - } \ - fastdecode_nextret ret = fastdecode_nextrepeated( \ - d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_StringView)); \ - switch (ret.next) { \ - case FD_NEXT_SAMEFIELD: \ - dst = ret.dst; \ - goto again; \ - case FD_NEXT_OTHERFIELD: \ - data = ret.tag; \ - UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ - case FD_NEXT_ATLIMIT: \ - return ptr; \ - } \ - } \ - \ - if (card != CARD_r && validate_utf8) { \ - data = (uint64_t)dst; \ - UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ - } \ - \ - UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); - -/* Generate all combinations: - * {p,c} x {s,o,r} x {s, b} x {1bt,2bt} */ +UPB_FORCEINLINE +static const char* fastdecode_delimited( + upb_Decoder* d, const char* ptr, + upb_EpsCopyInputStream_ParseDelimitedFunc* func, void* ctx) { + ptr++; -#define s_VALIDATE true -#define b_VALIDATE false + // Sign-extend so varint greater than one byte becomes negative, causing + // fast delimited parse to fail. + int len = (int8_t)ptr[-1]; -#define F(card, tagbytes, type) \ - UPB_NOINLINE \ - const char* upb_c##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ - FASTDECODE_COPYSTRING(d, ptr, msg, table, hasbits, data, tagbytes, \ - CARD_##card, type##_VALIDATE); \ - } \ - const char* upb_p##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ - FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes, \ - CARD_##card, upb_c##card##type##_##tagbytes##bt, \ - type##_VALIDATE); \ + if (!upb_EpsCopyInputStream_TryParseDelimitedFast(&d->input, &ptr, len, func, + ctx)) { + // Slow case: Sub-message is >=128 bytes and/or exceeds the current buffer. + // If it exceeds the buffer limit, limit/limit_ptr will change during + // sub-message parsing, so we need to preserve delta, not limit. + if (UPB_UNLIKELY(len & 0x80)) { + // Size varint >1 byte (length >= 128). + ptr = fastdecode_longsize(ptr, &len); + if (!ptr) { + // Corrupt wire format: size exceeded INT_MAX. + return NULL; + } + } + if (!upb_EpsCopyInputStream_CheckSize(&d->input, ptr, len)) { + // Corrupt wire format: invalid limit. + return NULL; + } + int delta = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, len); + ptr = func(&d->input, ptr, ctx); + upb_EpsCopyInputStream_PopLimit(&d->input, ptr, delta); } + return ptr; +} -#define UTF8(card, tagbytes) \ - F(card, tagbytes, s) \ - F(card, tagbytes, b) +/* singular, oneof, repeated field handling ***********************************/ -#define TAGBYTES(card) \ - UTF8(card, 1) \ - UTF8(card, 2) +typedef struct { + upb_Array* arr; + void* end; +} fastdecode_arr; -TAGBYTES(s) -TAGBYTES(o) -TAGBYTES(r) +typedef enum { + FD_NEXT_ATLIMIT, + FD_NEXT_SAMEFIELD, + FD_NEXT_OTHERFIELD +} fastdecode_next; -#undef s_VALIDATE -#undef b_VALIDATE -#undef F -#undef TAGBYTES -#undef FASTDECODE_LONGSTRING -#undef FASTDECODE_COPYSTRING -#undef FASTDECODE_STRING +typedef struct { + void* dst; + fastdecode_next next; + uint32_t tag; +} fastdecode_nextret; -/* message fields *************************************************************/ +UPB_FORCEINLINE +static void* fastdecode_resizearr(upb_Decoder* d, void* dst, + fastdecode_arr* farr, int valbytes) { + if (UPB_UNLIKELY(dst == farr->end)) { + size_t old_capacity = farr->arr->UPB_PRIVATE(capacity); + size_t old_bytes = old_capacity * valbytes; + size_t new_capacity = old_capacity * 2; + size_t new_bytes = new_capacity * valbytes; + char* old_ptr = _upb_array_ptr(farr->arr); + char* new_ptr = upb_Arena_Realloc(&d->arena, old_ptr, old_bytes, new_bytes); + uint8_t elem_size_lg2 = __builtin_ctz(valbytes); + UPB_PRIVATE(_upb_Array_SetTaggedPtr)(farr->arr, new_ptr, elem_size_lg2); + farr->arr->UPB_PRIVATE(capacity) = new_capacity; + dst = (void*)(new_ptr + (old_capacity * valbytes)); + farr->end = (void*)(new_ptr + (new_capacity * valbytes)); + } + return dst; +} -UPB_INLINE -upb_Message* decode_newmsg_ceil(upb_Decoder* d, const upb_MiniTable* m, - int msg_ceil_bytes) { - size_t size = m->UPB_PRIVATE(size) + sizeof(upb_Message_Internal); - char* msg_data; - if (UPB_LIKELY(msg_ceil_bytes > 0 && - UPB_PRIVATE(_upb_ArenaHas)(&d->arena) >= msg_ceil_bytes)) { - UPB_ASSERT(size <= (size_t)msg_ceil_bytes); - msg_data = d->arena.UPB_PRIVATE(ptr); - d->arena.UPB_PRIVATE(ptr) += size; - UPB_UNPOISON_MEMORY_REGION(msg_data, msg_ceil_bytes); - memset(msg_data, 0, msg_ceil_bytes); - UPB_POISON_MEMORY_REGION(msg_data + size, msg_ceil_bytes - size); +UPB_FORCEINLINE +static bool fastdecode_tagmatch(uint32_t tag, uint64_t data, int tagbytes) { + if (tagbytes == 1) { + return (uint8_t)tag == (uint8_t)data; } else { - msg_data = (char*)upb_Arena_Malloc(&d->arena, size); - memset(msg_data, 0, size); + return (uint16_t)tag == (uint16_t)data; } - return msg_data + sizeof(upb_Message_Internal); } -typedef struct { - intptr_t table; - upb_Message* msg; -} fastdecode_submsgdata; +UPB_FORCEINLINE +static void fastdecode_commitarr(void* dst, fastdecode_arr* farr, + int valbytes) { + farr->arr->UPB_PRIVATE(size) = + (size_t)((char*)dst - (char*)_upb_array_ptr(farr->arr)) / valbytes; +} UPB_FORCEINLINE -static const char* fastdecode_tosubmsg(upb_EpsCopyInputStream* e, - const char* ptr, void* ctx) { - upb_Decoder* d = (upb_Decoder*)e; - fastdecode_submsgdata* submsg = ctx; - ptr = fastdecode_dispatch(d, ptr, submsg->msg, submsg->table, 0, 0); - UPB_ASSUME(ptr != NULL); - return ptr; +static fastdecode_nextret fastdecode_nextrepeated(upb_Decoder* d, void* dst, + const char** ptr, + fastdecode_arr* farr, + uint64_t data, int tagbytes, + int valbytes) { + fastdecode_nextret ret; + dst = (char*)dst + valbytes; + + if (UPB_LIKELY(!_upb_Decoder_IsDone(d, ptr))) { + ret.tag = _upb_FastDecoder_LoadTag(*ptr); + if (fastdecode_tagmatch(ret.tag, data, tagbytes)) { + ret.next = FD_NEXT_SAMEFIELD; + } else { + fastdecode_commitarr(dst, farr, valbytes); + ret.next = FD_NEXT_OTHERFIELD; + } + } else { + fastdecode_commitarr(dst, farr, valbytes); + ret.next = FD_NEXT_ATLIMIT; + } + + ret.dst = dst; + return ret; } -#define FASTDECODE_SUBMSG(d, ptr, msg, table, hasbits, data, tagbytes, \ - msg_ceil_bytes, card) \ - \ - if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ - RETURN_GENERIC("submessage field tag mismatch\n"); \ - } \ - \ - if (--d->depth == 0) { \ - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_MaxDepthExceeded); \ - } \ - \ - upb_Message** dst; \ - uint32_t submsg_idx = (data >> 16) & 0xff; \ - const upb_MiniTable* tablep = decode_totablep(table); \ - const upb_MiniTable* subtablep = upb_MiniTableSub_Message( \ - *UPB_PRIVATE(_upb_MiniTable_GetSubByIndex)(tablep, submsg_idx)); \ - fastdecode_submsgdata submsg = {decode_totable(subtablep)}; \ - fastdecode_arr farr; \ - \ - if (subtablep->UPB_PRIVATE(table_mask) == (uint8_t)-1) { \ - d->depth++; \ - RETURN_GENERIC("submessage doesn't have fast tables."); \ - } \ - \ - dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ - sizeof(upb_Message*), card); \ - \ - if (card == CARD_s) { \ - *(uint32_t*)msg |= hasbits; \ - hasbits = 0; \ - } \ - \ - again: \ - if (card == CARD_r) { \ - dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_Message*)); \ - } \ - \ - submsg.msg = *dst; \ - \ - if (card == CARD_r || UPB_LIKELY(!submsg.msg)) { \ - *dst = submsg.msg = decode_newmsg_ceil(d, subtablep, msg_ceil_bytes); \ - } \ - \ - ptr += tagbytes; \ - ptr = fastdecode_delimited(d, ptr, fastdecode_tosubmsg, &submsg); \ - \ - if (UPB_UNLIKELY(ptr == NULL || d->end_group != DECODE_NOGROUP)) { \ - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ - } \ - \ - if (card == CARD_r) { \ - fastdecode_nextret ret = fastdecode_nextrepeated( \ - d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_Message*)); \ - switch (ret.next) { \ - case FD_NEXT_SAMEFIELD: \ - dst = ret.dst; \ - goto again; \ - case FD_NEXT_OTHERFIELD: \ - d->depth++; \ - data = ret.tag; \ - UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ - case FD_NEXT_ATLIMIT: \ - d->depth++; \ - return ptr; \ - } \ - } \ - \ - d->depth++; \ - UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); - -#define F(card, tagbytes, size_ceil, ceil_arg) \ - const char* upb_p##card##m_##tagbytes##bt_max##size_ceil##b( \ - UPB_PARSE_PARAMS) { \ - FASTDECODE_SUBMSG(d, ptr, msg, table, hasbits, data, tagbytes, ceil_arg, \ - CARD_##card); \ - } - -#define SIZES(card, tagbytes) \ - F(card, tagbytes, 64, 64) \ - F(card, tagbytes, 128, 128) \ - F(card, tagbytes, 192, 192) \ - F(card, tagbytes, 256, 256) \ - F(card, tagbytes, max, -1) - -#define TAGBYTES(card) \ - SIZES(card, 1) \ - SIZES(card, 2) - -TAGBYTES(s) -TAGBYTES(o) -TAGBYTES(r) - -#undef TAGBYTES -#undef SIZES -#undef F -#undef FASTDECODE_SUBMSG - -#endif /* UPB_FASTTABLE */ - -// We encode backwards, to avoid pre-computing lengths (one-pass encode). - - -#include -#include -#include -#include - - -// Must be last. - -#define UPB_PB_VARINT_MAX_LEN 10 - -UPB_NOINLINE -static size_t encode_varint64(uint64_t val, char* buf) { - size_t i = 0; - do { - uint8_t byte = val & 0x7fU; - val >>= 7; - if (val) byte |= 0x80U; - buf[i++] = byte; - } while (val); - return i; -} - -static uint32_t encode_zz32(int32_t n) { - return ((uint32_t)n << 1) ^ (n >> 31); -} -static uint64_t encode_zz64(int64_t n) { - return ((uint64_t)n << 1) ^ (n >> 63); +UPB_FORCEINLINE +static void* fastdecode_fieldmem(upb_Message* msg, uint64_t data) { + size_t ofs = data >> 48; + return (char*)msg + ofs; } -typedef struct { - upb_EncodeStatus status; - jmp_buf err; - upb_Arena* arena; - char *buf, *ptr, *limit; - int options; - int depth; - _upb_mapsorter sorter; -} upb_encstate; - -static size_t upb_roundup_pow2(size_t bytes) { - size_t ret = 128; - while (ret < bytes) { - ret *= 2; +UPB_FORCEINLINE +static void* fastdecode_getfield(upb_Decoder* d, const char* ptr, + upb_Message* msg, uint64_t* data, + uint64_t* hasbits, fastdecode_arr* farr, + int valbytes, upb_card card) { + switch (card) { + case CARD_s: { + uint8_t hasbit_index = *data >> 24; + // Set hasbit and return pointer to scalar field. + *hasbits |= 1ull << hasbit_index; + return fastdecode_fieldmem(msg, *data); + } + case CARD_o: { + uint16_t case_ofs = *data >> 32; + uint32_t* oneof_case = UPB_PTR_AT(msg, case_ofs, uint32_t); + uint8_t field_number = *data >> 24; + *oneof_case = field_number; + return fastdecode_fieldmem(msg, *data); + } + case CARD_r: { + // Get pointer to upb_Array and allocate/expand if necessary. + uint8_t elem_size_lg2 = __builtin_ctz(valbytes); + upb_Array** arr_p = fastdecode_fieldmem(msg, *data); + char* begin; + *(uint32_t*)msg |= *hasbits; + *hasbits = 0; + if (UPB_LIKELY(!*arr_p)) { + farr->arr = UPB_PRIVATE(_upb_Array_New)(&d->arena, 8, elem_size_lg2); + *arr_p = farr->arr; + } else { + farr->arr = *arr_p; + } + begin = _upb_array_ptr(farr->arr); + farr->end = begin + (farr->arr->UPB_PRIVATE(capacity) * valbytes); + *data = _upb_FastDecoder_LoadTag(ptr); + return begin + (farr->arr->UPB_PRIVATE(size) * valbytes); + } + default: + UPB_UNREACHABLE(); } - return ret; } -UPB_NORETURN static void encode_err(upb_encstate* e, upb_EncodeStatus s) { - UPB_ASSERT(s != kUpb_EncodeStatus_Ok); - e->status = s; - UPB_LONGJMP(e->err, 1); +UPB_FORCEINLINE +static bool fastdecode_flippacked(uint64_t* data, int tagbytes) { + *data ^= (0x2 ^ 0x0); // Patch data to match packed wiretype. + return fastdecode_checktag(*data, tagbytes); } -UPB_NOINLINE -static void encode_growbuffer(upb_encstate* e, size_t bytes) { - size_t old_size = e->limit - e->buf; - size_t new_size = upb_roundup_pow2(bytes + (e->limit - e->ptr)); - char* new_buf = upb_Arena_Realloc(e->arena, e->buf, old_size, new_size); - - if (!new_buf) encode_err(e, kUpb_EncodeStatus_OutOfMemory); - - // We want previous data at the end, realloc() put it at the beginning. - // TODO: This is somewhat inefficient since we are copying twice. - // Maybe create a realloc() that copies to the end of the new buffer? - if (old_size > 0) { - memmove(new_buf + new_size - old_size, e->buf, old_size); +#define FASTDECODE_CHECKPACKED(tagbytes, card, func) \ + if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ + if (card == CARD_r && fastdecode_flippacked(&data, tagbytes)) { \ + UPB_MUSTTAIL return func(UPB_PARSE_ARGS); \ + } \ + RETURN_GENERIC("packed check tag mismatch\n"); \ } - e->ptr = new_buf + new_size - (e->limit - e->ptr); - e->limit = new_buf + new_size; - e->buf = new_buf; - - e->ptr -= bytes; -} +/* varint fields **************************************************************/ -/* Call to ensure that at least "bytes" bytes are available for writing at - * e->ptr. Returns false if the bytes could not be allocated. */ UPB_FORCEINLINE -static void encode_reserve(upb_encstate* e, size_t bytes) { - if ((size_t)(e->ptr - e->buf) < bytes) { - encode_growbuffer(e, bytes); - return; +static uint64_t fastdecode_munge(uint64_t val, int valbytes, bool zigzag) { + if (valbytes == 1) { + return val != 0; + } else if (zigzag) { + if (valbytes == 4) { + uint32_t n = val; + return (n >> 1) ^ -(int32_t)(n & 1); + } else if (valbytes == 8) { + return (val >> 1) ^ -(int64_t)(val & 1); + } + UPB_UNREACHABLE(); } - - e->ptr -= bytes; -} - -/* Writes the given bytes to the buffer, handling reserve/advance. */ -static void encode_bytes(upb_encstate* e, const void* data, size_t len) { - if (len == 0) return; /* memcpy() with zero size is UB */ - encode_reserve(e, len); - memcpy(e->ptr, data, len); -} - -static void encode_fixed64(upb_encstate* e, uint64_t val) { - val = _upb_BigEndian_Swap64(val); - encode_bytes(e, &val, sizeof(uint64_t)); -} - -static void encode_fixed32(upb_encstate* e, uint32_t val) { - val = _upb_BigEndian_Swap32(val); - encode_bytes(e, &val, sizeof(uint32_t)); -} - -UPB_NOINLINE -static void encode_longvarint(upb_encstate* e, uint64_t val) { - size_t len; - char* start; - - encode_reserve(e, UPB_PB_VARINT_MAX_LEN); - len = encode_varint64(val, e->ptr); - start = e->ptr + UPB_PB_VARINT_MAX_LEN - len; - memmove(start, e->ptr, len); - e->ptr = start; + return val; } UPB_FORCEINLINE -static void encode_varint(upb_encstate* e, uint64_t val) { - if (val < 128 && e->ptr != e->buf) { - --e->ptr; - *e->ptr = val; - } else { - encode_longvarint(e, val); +static const char* fastdecode_varint64(const char* ptr, uint64_t* val) { + ptr++; + *val = (uint8_t)ptr[-1]; + if (UPB_UNLIKELY(*val & 0x80)) { + int i; + for (i = 0; i < 8; i++) { + ptr++; + uint64_t byte = (uint8_t)ptr[-1]; + *val += (byte - 1) << (7 + 7 * i); + if (UPB_LIKELY((byte & 0x80) == 0)) goto done; + } + ptr++; + uint64_t byte = (uint8_t)ptr[-1]; + if (byte > 1) { + return NULL; + } + *val += (byte - 1) << 63; } +done: + UPB_ASSUME(ptr != NULL); + return ptr; } -static void encode_double(upb_encstate* e, double d) { - uint64_t u64; - UPB_ASSERT(sizeof(double) == sizeof(uint64_t)); - memcpy(&u64, &d, sizeof(uint64_t)); - encode_fixed64(e, u64); -} - -static void encode_float(upb_encstate* e, float d) { - uint32_t u32; - UPB_ASSERT(sizeof(float) == sizeof(uint32_t)); - memcpy(&u32, &d, sizeof(uint32_t)); - encode_fixed32(e, u32); -} - -static void encode_tag(upb_encstate* e, uint32_t field_number, - uint8_t wire_type) { - encode_varint(e, (field_number << 3) | wire_type); -} +#define FASTDECODE_UNPACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, zigzag, packed) \ + uint64_t val; \ + void* dst; \ + fastdecode_arr farr; \ + \ + FASTDECODE_CHECKPACKED(tagbytes, card, packed); \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes, \ + card); \ + if (card == CARD_r) { \ + if (UPB_UNLIKELY(!dst)) { \ + RETURN_GENERIC("need array resize\n"); \ + } \ + } \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, valbytes); \ + } \ + \ + ptr += tagbytes; \ + ptr = fastdecode_varint64(ptr, &val); \ + if (ptr == NULL) _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ + val = fastdecode_munge(val, valbytes, zigzag); \ + memcpy(dst, &val, valbytes); \ + \ + if (card == CARD_r) { \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, valbytes); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + data = ret.tag; \ + UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + return ptr; \ + } \ + } \ + \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); -static void encode_fixedarray(upb_encstate* e, const upb_Array* arr, - size_t elem_size, uint32_t tag) { - size_t bytes = arr->UPB_PRIVATE(size) * elem_size; - const char* data = _upb_array_constptr(arr); - const char* ptr = data + bytes - elem_size; +typedef struct { + uint8_t valbytes; + bool zigzag; + void* dst; + fastdecode_arr farr; +} fastdecode_varintdata; - if (tag || !_upb_IsLittleEndian()) { - while (true) { - if (elem_size == 4) { - uint32_t val; - memcpy(&val, ptr, sizeof(val)); - val = _upb_BigEndian_Swap32(val); - encode_bytes(e, &val, elem_size); - } else { - UPB_ASSERT(elem_size == 8); - uint64_t val; - memcpy(&val, ptr, sizeof(val)); - val = _upb_BigEndian_Swap64(val); - encode_bytes(e, &val, elem_size); - } +UPB_FORCEINLINE +static const char* fastdecode_topackedvarint(upb_EpsCopyInputStream* e, + const char* ptr, void* ctx) { + upb_Decoder* d = (upb_Decoder*)e; + fastdecode_varintdata* data = ctx; + void* dst = data->dst; + uint64_t val; - if (tag) encode_varint(e, tag); - if (ptr == data) break; - ptr -= elem_size; - } - } else { - encode_bytes(e, data, bytes); + while (!_upb_Decoder_IsDone(d, &ptr)) { + dst = fastdecode_resizearr(d, dst, &data->farr, data->valbytes); + ptr = fastdecode_varint64(ptr, &val); + if (ptr == NULL) return NULL; + val = fastdecode_munge(val, data->valbytes, data->zigzag); + memcpy(dst, &val, data->valbytes); + dst = (char*)dst + data->valbytes; } -} -static void encode_message(upb_encstate* e, const upb_Message* msg, - const upb_MiniTable* m, size_t* size); - -static void encode_TaggedMessagePtr(upb_encstate* e, - upb_TaggedMessagePtr tagged, - const upb_MiniTable* m, size_t* size) { - if (upb_TaggedMessagePtr_IsEmpty(tagged)) { - m = UPB_PRIVATE(_upb_MiniTable_Empty)(); - } - encode_message(e, _upb_TaggedMessagePtr_GetMessage(tagged), m, size); + fastdecode_commitarr(dst, &data->farr, data->valbytes); + return ptr; } -static void encode_scalar(upb_encstate* e, const void* _field_mem, - const upb_MiniTableSub* subs, - const upb_MiniTableField* f) { - const char* field_mem = _field_mem; - int wire_type; - -#define CASE(ctype, type, wtype, encodeval) \ - { \ - ctype val = *(ctype*)field_mem; \ - encode_##type(e, encodeval); \ - wire_type = wtype; \ - break; \ - } +#define FASTDECODE_PACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, zigzag, unpacked) \ + fastdecode_varintdata ctx = {valbytes, zigzag}; \ + \ + FASTDECODE_CHECKPACKED(tagbytes, CARD_r, unpacked); \ + \ + ctx.dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &ctx.farr, \ + valbytes, CARD_r); \ + if (UPB_UNLIKELY(!ctx.dst)) { \ + RETURN_GENERIC("need array resize\n"); \ + } \ + \ + ptr += tagbytes; \ + ptr = fastdecode_delimited(d, ptr, &fastdecode_topackedvarint, &ctx); \ + \ + if (UPB_UNLIKELY(ptr == NULL)) { \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ + } \ + \ + UPB_MUSTTAIL return fastdecode_dispatch(d, ptr, msg, table, hasbits, 0); - switch (f->UPB_PRIVATE(descriptortype)) { - case kUpb_FieldType_Double: - CASE(double, double, kUpb_WireType_64Bit, val); - case kUpb_FieldType_Float: - CASE(float, float, kUpb_WireType_32Bit, val); - case kUpb_FieldType_Int64: - case kUpb_FieldType_UInt64: - CASE(uint64_t, varint, kUpb_WireType_Varint, val); - case kUpb_FieldType_UInt32: - CASE(uint32_t, varint, kUpb_WireType_Varint, val); - case kUpb_FieldType_Int32: - case kUpb_FieldType_Enum: - CASE(int32_t, varint, kUpb_WireType_Varint, (int64_t)val); - case kUpb_FieldType_SFixed64: - case kUpb_FieldType_Fixed64: - CASE(uint64_t, fixed64, kUpb_WireType_64Bit, val); - case kUpb_FieldType_Fixed32: - case kUpb_FieldType_SFixed32: - CASE(uint32_t, fixed32, kUpb_WireType_32Bit, val); - case kUpb_FieldType_Bool: - CASE(bool, varint, kUpb_WireType_Varint, val); - case kUpb_FieldType_SInt32: - CASE(int32_t, varint, kUpb_WireType_Varint, encode_zz32(val)); - case kUpb_FieldType_SInt64: - CASE(int64_t, varint, kUpb_WireType_Varint, encode_zz64(val)); - case kUpb_FieldType_String: - case kUpb_FieldType_Bytes: { - upb_StringView view = *(upb_StringView*)field_mem; - encode_bytes(e, view.data, view.size); - encode_varint(e, view.size); - wire_type = kUpb_WireType_Delimited; - break; - } - case kUpb_FieldType_Group: { - size_t size; - upb_TaggedMessagePtr submsg = *(upb_TaggedMessagePtr*)field_mem; - const upb_MiniTable* subm = - upb_MiniTableSub_Message(subs[f->UPB_PRIVATE(submsg_index)]); - if (submsg == 0) { - return; - } - if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded); - encode_tag(e, f->UPB_PRIVATE(number), kUpb_WireType_EndGroup); - encode_TaggedMessagePtr(e, submsg, subm, &size); - wire_type = kUpb_WireType_StartGroup; - e->depth++; - break; - } - case kUpb_FieldType_Message: { - size_t size; - upb_TaggedMessagePtr submsg = *(upb_TaggedMessagePtr*)field_mem; - const upb_MiniTable* subm = - upb_MiniTableSub_Message(subs[f->UPB_PRIVATE(submsg_index)]); - if (submsg == 0) { - return; - } - if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded); - encode_TaggedMessagePtr(e, submsg, subm, &size); - encode_varint(e, size); - wire_type = kUpb_WireType_Delimited; - e->depth++; - break; - } - default: - UPB_UNREACHABLE(); +#define FASTDECODE_VARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, zigzag, unpacked, packed) \ + if (card == CARD_p) { \ + FASTDECODE_PACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, zigzag, unpacked); \ + } else { \ + FASTDECODE_UNPACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, zigzag, packed); \ } -#undef CASE - encode_tag(e, f->UPB_PRIVATE(number), wire_type); -} +#define z_ZZ true +#define b_ZZ false +#define v_ZZ false -static void encode_array(upb_encstate* e, const upb_Message* msg, - const upb_MiniTableSub* subs, - const upb_MiniTableField* f) { - const upb_Array* arr = *UPB_PTR_AT(msg, f->UPB_PRIVATE(offset), upb_Array*); - bool packed = upb_MiniTableField_IsPacked(f); - size_t pre_len = e->limit - e->ptr; +/* Generate all combinations: + * {s,o,r,p} x {b1,v4,z4,v8,z8} x {1bt,2bt} */ - if (arr == NULL || arr->UPB_PRIVATE(size) == 0) { - return; +#define F(card, type, valbytes, tagbytes) \ + UPB_NOINLINE \ + const char* upb_p##card##type##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ + FASTDECODE_VARINT(d, ptr, msg, table, hasbits, data, tagbytes, valbytes, \ + CARD_##card, type##_ZZ, \ + upb_pr##type##valbytes##_##tagbytes##bt, \ + upb_pp##type##valbytes##_##tagbytes##bt); \ } -#define VARINT_CASE(ctype, encode) \ - { \ - const ctype* start = _upb_array_constptr(arr); \ - const ctype* ptr = start + arr->UPB_PRIVATE(size); \ - uint32_t tag = \ - packed ? 0 : (f->UPB_PRIVATE(number) << 3) | kUpb_WireType_Varint; \ - do { \ - ptr--; \ - encode_varint(e, encode); \ - if (tag) encode_varint(e, tag); \ - } while (ptr != start); \ - } \ - break; +#define TYPES(card, tagbytes) \ + F(card, b, 1, tagbytes) \ + F(card, v, 4, tagbytes) \ + F(card, v, 8, tagbytes) \ + F(card, z, 4, tagbytes) \ + F(card, z, 8, tagbytes) -#define TAG(wire_type) (packed ? 0 : (f->UPB_PRIVATE(number) << 3 | wire_type)) +#define TAGBYTES(card) \ + TYPES(card, 1) \ + TYPES(card, 2) - switch (f->UPB_PRIVATE(descriptortype)) { - case kUpb_FieldType_Double: - encode_fixedarray(e, arr, sizeof(double), TAG(kUpb_WireType_64Bit)); - break; - case kUpb_FieldType_Float: - encode_fixedarray(e, arr, sizeof(float), TAG(kUpb_WireType_32Bit)); - break; - case kUpb_FieldType_SFixed64: - case kUpb_FieldType_Fixed64: - encode_fixedarray(e, arr, sizeof(uint64_t), TAG(kUpb_WireType_64Bit)); - break; - case kUpb_FieldType_Fixed32: - case kUpb_FieldType_SFixed32: - encode_fixedarray(e, arr, sizeof(uint32_t), TAG(kUpb_WireType_32Bit)); - break; - case kUpb_FieldType_Int64: - case kUpb_FieldType_UInt64: - VARINT_CASE(uint64_t, *ptr); - case kUpb_FieldType_UInt32: - VARINT_CASE(uint32_t, *ptr); - case kUpb_FieldType_Int32: - case kUpb_FieldType_Enum: - VARINT_CASE(int32_t, (int64_t)*ptr); - case kUpb_FieldType_Bool: - VARINT_CASE(bool, *ptr); - case kUpb_FieldType_SInt32: - VARINT_CASE(int32_t, encode_zz32(*ptr)); - case kUpb_FieldType_SInt64: - VARINT_CASE(int64_t, encode_zz64(*ptr)); - case kUpb_FieldType_String: - case kUpb_FieldType_Bytes: { - const upb_StringView* start = _upb_array_constptr(arr); - const upb_StringView* ptr = start + arr->UPB_PRIVATE(size); - do { - ptr--; - encode_bytes(e, ptr->data, ptr->size); - encode_varint(e, ptr->size); - encode_tag(e, f->UPB_PRIVATE(number), kUpb_WireType_Delimited); - } while (ptr != start); - return; - } - case kUpb_FieldType_Group: { - const upb_TaggedMessagePtr* start = _upb_array_constptr(arr); - const upb_TaggedMessagePtr* ptr = start + arr->UPB_PRIVATE(size); - const upb_MiniTable* subm = - upb_MiniTableSub_Message(subs[f->UPB_PRIVATE(submsg_index)]); - if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded); - do { - size_t size; - ptr--; - encode_tag(e, f->UPB_PRIVATE(number), kUpb_WireType_EndGroup); - encode_TaggedMessagePtr(e, *ptr, subm, &size); - encode_tag(e, f->UPB_PRIVATE(number), kUpb_WireType_StartGroup); - } while (ptr != start); - e->depth++; - return; - } - case kUpb_FieldType_Message: { - const upb_TaggedMessagePtr* start = _upb_array_constptr(arr); - const upb_TaggedMessagePtr* ptr = start + arr->UPB_PRIVATE(size); - const upb_MiniTable* subm = - upb_MiniTableSub_Message(subs[f->UPB_PRIVATE(submsg_index)]); - if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded); - do { - size_t size; - ptr--; - encode_TaggedMessagePtr(e, *ptr, subm, &size); - encode_varint(e, size); - encode_tag(e, f->UPB_PRIVATE(number), kUpb_WireType_Delimited); - } while (ptr != start); - e->depth++; - return; - } +TAGBYTES(s) +TAGBYTES(o) +TAGBYTES(r) +TAGBYTES(p) + +#undef z_ZZ +#undef b_ZZ +#undef v_ZZ +#undef o_ONEOF +#undef s_ONEOF +#undef r_ONEOF +#undef F +#undef TYPES +#undef TAGBYTES +#undef FASTDECODE_UNPACKEDVARINT +#undef FASTDECODE_PACKEDVARINT +#undef FASTDECODE_VARINT + +/* fixed fields ***************************************************************/ + +#define FASTDECODE_UNPACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, packed) \ + void* dst; \ + fastdecode_arr farr; \ + \ + FASTDECODE_CHECKPACKED(tagbytes, card, packed) \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes, \ + card); \ + if (card == CARD_r) { \ + if (UPB_UNLIKELY(!dst)) { \ + RETURN_GENERIC("couldn't allocate array in arena\n"); \ + } \ + } \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, valbytes); \ + } \ + \ + ptr += tagbytes; \ + memcpy(dst, ptr, valbytes); \ + ptr += valbytes; \ + \ + if (card == CARD_r) { \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, valbytes); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + data = ret.tag; \ + UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + return ptr; \ + } \ + } \ + \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); + +#define FASTDECODE_PACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, unpacked) \ + FASTDECODE_CHECKPACKED(tagbytes, CARD_r, unpacked) \ + \ + ptr += tagbytes; \ + int size = (uint8_t)ptr[0]; \ + ptr++; \ + if (size & 0x80) { \ + ptr = fastdecode_longsize(ptr, &size); \ + } \ + \ + if (UPB_UNLIKELY(!upb_EpsCopyInputStream_CheckDataSizeAvailable( \ + &d->input, ptr, size) || \ + (size % valbytes) != 0)) { \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ + } \ + \ + upb_Array** arr_p = fastdecode_fieldmem(msg, data); \ + upb_Array* arr = *arr_p; \ + uint8_t elem_size_lg2 = __builtin_ctz(valbytes); \ + int elems = size / valbytes; \ + \ + if (UPB_LIKELY(!arr)) { \ + *arr_p = arr = \ + UPB_PRIVATE(_upb_Array_New)(&d->arena, elems, elem_size_lg2); \ + if (!arr) { \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ + } \ + } else { \ + _upb_Array_ResizeUninitialized(arr, elems, &d->arena); \ + } \ + \ + char* dst = _upb_array_ptr(arr); \ + memcpy(dst, ptr, size); \ + arr->UPB_PRIVATE(size) = elems; \ + \ + ptr += size; \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); + +#define FASTDECODE_FIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, unpacked, packed) \ + if (card == CARD_p) { \ + FASTDECODE_PACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, unpacked); \ + } else { \ + FASTDECODE_UNPACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, packed); \ } -#undef VARINT_CASE - if (packed) { - encode_varint(e, e->limit - e->ptr - pre_len); - encode_tag(e, f->UPB_PRIVATE(number), kUpb_WireType_Delimited); +/* Generate all combinations: + * {s,o,r,p} x {f4,f8} x {1bt,2bt} */ + +#define F(card, valbytes, tagbytes) \ + UPB_NOINLINE \ + const char* upb_p##card##f##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ + FASTDECODE_FIXED(d, ptr, msg, table, hasbits, data, tagbytes, valbytes, \ + CARD_##card, upb_ppf##valbytes##_##tagbytes##bt, \ + upb_prf##valbytes##_##tagbytes##bt); \ } -} -static void encode_mapentry(upb_encstate* e, uint32_t number, - const upb_MiniTable* layout, - const upb_MapEntry* ent) { - const upb_MiniTableField* key_field = &layout->UPB_PRIVATE(fields)[0]; - const upb_MiniTableField* val_field = &layout->UPB_PRIVATE(fields)[1]; - size_t pre_len = e->limit - e->ptr; - size_t size; - encode_scalar(e, &ent->data.v, layout->UPB_PRIVATE(subs), val_field); - encode_scalar(e, &ent->data.k, layout->UPB_PRIVATE(subs), key_field); - size = (e->limit - e->ptr) - pre_len; - encode_varint(e, size); - encode_tag(e, number, kUpb_WireType_Delimited); -} +#define TYPES(card, tagbytes) \ + F(card, 4, tagbytes) \ + F(card, 8, tagbytes) -static void encode_map(upb_encstate* e, const upb_Message* msg, - const upb_MiniTableSub* subs, - const upb_MiniTableField* f) { - const upb_Map* map = *UPB_PTR_AT(msg, f->UPB_PRIVATE(offset), const upb_Map*); - const upb_MiniTable* layout = - upb_MiniTableSub_Message(subs[f->UPB_PRIVATE(submsg_index)]); - UPB_ASSERT(layout->UPB_PRIVATE(field_count) == 2); +#define TAGBYTES(card) \ + TYPES(card, 1) \ + TYPES(card, 2) - if (map == NULL) return; +TAGBYTES(s) +TAGBYTES(o) +TAGBYTES(r) +TAGBYTES(p) - if (e->options & kUpb_EncodeOption_Deterministic) { - _upb_sortedmap sorted; - _upb_mapsorter_pushmap( - &e->sorter, layout->UPB_PRIVATE(fields)[0].UPB_PRIVATE(descriptortype), - map, &sorted); - upb_MapEntry ent; - while (_upb_sortedmap_next(&e->sorter, map, &sorted, &ent)) { - encode_mapentry(e, f->UPB_PRIVATE(number), layout, &ent); - } - _upb_mapsorter_popmap(&e->sorter, &sorted); - } else { - intptr_t iter = UPB_STRTABLE_BEGIN; - upb_StringView key; - upb_value val; - while (upb_strtable_next2(&map->table, &key, &val, &iter)) { - upb_MapEntry ent; - _upb_map_fromkey(key, &ent.data.k, map->key_size); - _upb_map_fromvalue(val, &ent.data.v, map->val_size); - encode_mapentry(e, f->UPB_PRIVATE(number), layout, &ent); - } - } -} +#undef F +#undef TYPES +#undef TAGBYTES +#undef FASTDECODE_UNPACKEDFIXED +#undef FASTDECODE_PACKEDFIXED -static bool encode_shouldencode(upb_encstate* e, const upb_Message* msg, - const upb_MiniTableSub* subs, - const upb_MiniTableField* f) { - if (f->presence == 0) { - // Proto3 presence or map/array. - const void* mem = UPB_PTR_AT(msg, f->UPB_PRIVATE(offset), void); - switch (UPB_PRIVATE(_upb_MiniTableField_GetRep)(f)) { - case kUpb_FieldRep_1Byte: { - char ch; - memcpy(&ch, mem, 1); - return ch != 0; - } - case kUpb_FieldRep_4Byte: { - uint32_t u32; - memcpy(&u32, mem, 4); - return u32 != 0; - } - case kUpb_FieldRep_8Byte: { - uint64_t u64; - memcpy(&u64, mem, 8); - return u64 != 0; - } - case kUpb_FieldRep_StringView: { - const upb_StringView* str = (const upb_StringView*)mem; - return str->size != 0; - } - default: - UPB_UNREACHABLE(); - } - } else if (f->presence > 0) { - // Proto2 presence: hasbit. - return UPB_PRIVATE(_upb_Message_GetHasbit)(msg, f); - } else { - // Field is in a oneof. - return UPB_PRIVATE(_upb_Message_GetOneofCase)(msg, f) == - f->UPB_PRIVATE(number); +/* string fields **************************************************************/ + +typedef const char* fastdecode_copystr_func(struct upb_Decoder* d, + const char* ptr, upb_Message* msg, + const upb_MiniTable* table, + uint64_t hasbits, + upb_StringView* dst); + +UPB_NOINLINE +static const char* fastdecode_verifyutf8(upb_Decoder* d, const char* ptr, + upb_Message* msg, intptr_t table, + uint64_t hasbits, uint64_t data) { + upb_StringView* dst = (upb_StringView*)data; + if (!_upb_Decoder_VerifyUtf8Inline(dst->data, dst->size)) { + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); } + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); } -static void encode_field(upb_encstate* e, const upb_Message* msg, - const upb_MiniTableSub* subs, - const upb_MiniTableField* field) { - switch (UPB_PRIVATE(_upb_MiniTableField_Mode)(field)) { - case kUpb_FieldMode_Array: - encode_array(e, msg, subs, field); - break; - case kUpb_FieldMode_Map: - encode_map(e, msg, subs, field); - break; - case kUpb_FieldMode_Scalar: - encode_scalar(e, UPB_PTR_AT(msg, field->UPB_PRIVATE(offset), void), subs, - field); - break; - default: - UPB_UNREACHABLE(); +#define FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, validate_utf8) \ + int size = (uint8_t)ptr[0]; /* Could plumb through hasbits. */ \ + ptr++; \ + if (size & 0x80) { \ + ptr = fastdecode_longsize(ptr, &size); \ + } \ + \ + if (UPB_UNLIKELY(!upb_EpsCopyInputStream_CheckSize(&d->input, ptr, size))) { \ + dst->size = 0; \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ + } \ + \ + const char* s_ptr = ptr; \ + ptr = upb_EpsCopyInputStream_ReadString(&d->input, &s_ptr, size, &d->arena); \ + if (!ptr) _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); \ + dst->data = s_ptr; \ + dst->size = size; \ + \ + if (validate_utf8) { \ + data = (uint64_t)dst; \ + UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ + } else { \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \ } + +UPB_NOINLINE +static const char* fastdecode_longstring_utf8(struct upb_Decoder* d, + const char* ptr, upb_Message* msg, + intptr_t table, uint64_t hasbits, + uint64_t data) { + upb_StringView* dst = (upb_StringView*)data; + FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, true); } -static void encode_msgset_item(upb_encstate* e, const upb_Extension* ext) { - size_t size; - encode_tag(e, kUpb_MsgSet_Item, kUpb_WireType_EndGroup); - encode_message(e, ext->data.ptr, - upb_MiniTableExtension_GetSubMessage(ext->ext), &size); - encode_varint(e, size); - encode_tag(e, kUpb_MsgSet_Message, kUpb_WireType_Delimited); - encode_varint(e, upb_MiniTableExtension_Number(ext->ext)); - encode_tag(e, kUpb_MsgSet_TypeId, kUpb_WireType_Varint); - encode_tag(e, kUpb_MsgSet_Item, kUpb_WireType_StartGroup); +UPB_NOINLINE +static const char* fastdecode_longstring_noutf8( + struct upb_Decoder* d, const char* ptr, upb_Message* msg, intptr_t table, + uint64_t hasbits, uint64_t data) { + upb_StringView* dst = (upb_StringView*)data; + FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, false); +} + +UPB_FORCEINLINE +static void fastdecode_docopy(upb_Decoder* d, const char* ptr, uint32_t size, + int copy, char* data, size_t data_offset, + upb_StringView* dst) { + d->arena.UPB_PRIVATE(ptr) += copy; + dst->data = data + data_offset; + UPB_UNPOISON_MEMORY_REGION(data, copy); + memcpy(data, ptr, copy); + UPB_POISON_MEMORY_REGION(data + data_offset + size, + copy - data_offset - size); } -static void encode_ext(upb_encstate* e, const upb_Extension* ext, - bool is_message_set) { - if (UPB_UNLIKELY(is_message_set)) { - encode_msgset_item(e, ext); - } else { - encode_field(e, (upb_Message*)&ext->data, &ext->ext->UPB_PRIVATE(sub), - &ext->ext->UPB_PRIVATE(field)); +#define FASTDECODE_COPYSTRING(d, ptr, msg, table, hasbits, data, tagbytes, \ + card, validate_utf8) \ + upb_StringView* dst; \ + fastdecode_arr farr; \ + int64_t size; \ + size_t arena_has; \ + size_t common_has; \ + char* buf; \ + \ + UPB_ASSERT(!upb_EpsCopyInputStream_AliasingAvailable(&d->input, ptr, 0)); \ + UPB_ASSERT(fastdecode_checktag(data, tagbytes)); \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ + sizeof(upb_StringView), card); \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_StringView)); \ + } \ + \ + size = (uint8_t)ptr[tagbytes]; \ + ptr += tagbytes + 1; \ + dst->size = size; \ + \ + buf = d->arena.UPB_PRIVATE(ptr); \ + arena_has = UPB_PRIVATE(_upb_ArenaHas)(&d->arena); \ + common_has = UPB_MIN(arena_has, \ + upb_EpsCopyInputStream_BytesAvailable(&d->input, ptr)); \ + \ + if (UPB_LIKELY(size <= 15 - tagbytes)) { \ + if (arena_has < 16) goto longstr; \ + fastdecode_docopy(d, ptr - tagbytes - 1, size, 16, buf, tagbytes + 1, \ + dst); \ + } else if (UPB_LIKELY(size <= 32)) { \ + if (UPB_UNLIKELY(common_has < 32)) goto longstr; \ + fastdecode_docopy(d, ptr, size, 32, buf, 0, dst); \ + } else if (UPB_LIKELY(size <= 64)) { \ + if (UPB_UNLIKELY(common_has < 64)) goto longstr; \ + fastdecode_docopy(d, ptr, size, 64, buf, 0, dst); \ + } else if (UPB_LIKELY(size < 128)) { \ + if (UPB_UNLIKELY(common_has < 128)) goto longstr; \ + fastdecode_docopy(d, ptr, size, 128, buf, 0, dst); \ + } else { \ + goto longstr; \ + } \ + \ + ptr += size; \ + \ + if (card == CARD_r) { \ + if (validate_utf8 && \ + !_upb_Decoder_VerifyUtf8Inline(dst->data, dst->size)) { \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); \ + } \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_StringView)); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + data = ret.tag; \ + UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + return ptr; \ + } \ + } \ + \ + if (card != CARD_r && validate_utf8) { \ + data = (uint64_t)dst; \ + UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ + } \ + \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \ + \ + longstr: \ + if (card == CARD_r) { \ + fastdecode_commitarr(dst + 1, &farr, sizeof(upb_StringView)); \ + } \ + ptr--; \ + if (validate_utf8) { \ + UPB_MUSTTAIL return fastdecode_longstring_utf8(d, ptr, msg, table, \ + hasbits, (uint64_t)dst); \ + } else { \ + UPB_MUSTTAIL return fastdecode_longstring_noutf8(d, ptr, msg, table, \ + hasbits, (uint64_t)dst); \ + } + +#define FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes, card, \ + copyfunc, validate_utf8) \ + upb_StringView* dst; \ + fastdecode_arr farr; \ + int64_t size; \ + \ + if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ + RETURN_GENERIC("string field tag mismatch\n"); \ + } \ + \ + if (UPB_UNLIKELY( \ + !upb_EpsCopyInputStream_AliasingAvailable(&d->input, ptr, 0))) { \ + UPB_MUSTTAIL return copyfunc(UPB_PARSE_ARGS); \ + } \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ + sizeof(upb_StringView), card); \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_StringView)); \ + } \ + \ + size = (int8_t)ptr[tagbytes]; \ + ptr += tagbytes + 1; \ + \ + if (UPB_UNLIKELY( \ + !upb_EpsCopyInputStream_AliasingAvailable(&d->input, ptr, size))) { \ + ptr--; \ + if (validate_utf8) { \ + return fastdecode_longstring_utf8(d, ptr, msg, table, hasbits, \ + (uint64_t)dst); \ + } else { \ + return fastdecode_longstring_noutf8(d, ptr, msg, table, hasbits, \ + (uint64_t)dst); \ + } \ + } \ + \ + dst->data = ptr; \ + dst->size = size; \ + ptr = upb_EpsCopyInputStream_ReadStringAliased(&d->input, &dst->data, \ + dst->size); \ + \ + if (card == CARD_r) { \ + if (validate_utf8 && \ + !_upb_Decoder_VerifyUtf8Inline(dst->data, dst->size)) { \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); \ + } \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_StringView)); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + data = ret.tag; \ + UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + return ptr; \ + } \ + } \ + \ + if (card != CARD_r && validate_utf8) { \ + data = (uint64_t)dst; \ + UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ + } \ + \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); + +/* Generate all combinations: + * {p,c} x {s,o,r} x {s, b} x {1bt,2bt} */ + +#define s_VALIDATE true +#define b_VALIDATE false + +#define F(card, tagbytes, type) \ + UPB_NOINLINE \ + const char* upb_c##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ + FASTDECODE_COPYSTRING(d, ptr, msg, table, hasbits, data, tagbytes, \ + CARD_##card, type##_VALIDATE); \ + } \ + const char* upb_p##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ + FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes, \ + CARD_##card, upb_c##card##type##_##tagbytes##bt, \ + type##_VALIDATE); \ } -} -static void encode_message(upb_encstate* e, const upb_Message* msg, - const upb_MiniTable* m, size_t* size) { - size_t pre_len = e->limit - e->ptr; +#define UTF8(card, tagbytes) \ + F(card, tagbytes, s) \ + F(card, tagbytes, b) - if ((e->options & kUpb_EncodeOption_CheckRequired) && - m->UPB_PRIVATE(required_count)) { - uint64_t msg_head; - memcpy(&msg_head, msg, 8); - msg_head = _upb_BigEndian_Swap64(msg_head); - if (UPB_PRIVATE(_upb_MiniTable_RequiredMask)(m) & ~msg_head) { - encode_err(e, kUpb_EncodeStatus_MissingRequired); - } - } +#define TAGBYTES(card) \ + UTF8(card, 1) \ + UTF8(card, 2) - if ((e->options & kUpb_EncodeOption_SkipUnknown) == 0) { - size_t unknown_size; - const char* unknown = upb_Message_GetUnknown(msg, &unknown_size); +TAGBYTES(s) +TAGBYTES(o) +TAGBYTES(r) - if (unknown) { - encode_bytes(e, unknown, unknown_size); - } - } +#undef s_VALIDATE +#undef b_VALIDATE +#undef F +#undef TAGBYTES +#undef FASTDECODE_LONGSTRING +#undef FASTDECODE_COPYSTRING +#undef FASTDECODE_STRING - if (m->UPB_PRIVATE(ext) != kUpb_ExtMode_NonExtendable) { - /* Encode all extensions together. Unlike C++, we do not attempt to keep - * these in field number order relative to normal fields or even to each - * other. */ - size_t ext_count; - const upb_Extension* ext = - UPB_PRIVATE(_upb_Message_Getexts)(msg, &ext_count); - if (ext_count) { - if (e->options & kUpb_EncodeOption_Deterministic) { - _upb_sortedmap sorted; - _upb_mapsorter_pushexts(&e->sorter, ext, ext_count, &sorted); - while (_upb_sortedmap_nextext(&e->sorter, &sorted, &ext)) { - encode_ext(e, ext, m->UPB_PRIVATE(ext) == kUpb_ExtMode_IsMessageSet); - } - _upb_mapsorter_popmap(&e->sorter, &sorted); - } else { - const upb_Extension* end = ext + ext_count; - for (; ext != end; ext++) { - encode_ext(e, ext, m->UPB_PRIVATE(ext) == kUpb_ExtMode_IsMessageSet); - } - } - } - } +/* message fields *************************************************************/ - if (m->UPB_PRIVATE(field_count)) { - const upb_MiniTableField* f = - &m->UPB_PRIVATE(fields)[m->UPB_PRIVATE(field_count)]; - const upb_MiniTableField* first = &m->UPB_PRIVATE(fields)[0]; - while (f != first) { - f--; - if (encode_shouldencode(e, msg, m->UPB_PRIVATE(subs), f)) { - encode_field(e, msg, m->UPB_PRIVATE(subs), f); - } - } +UPB_INLINE +upb_Message* decode_newmsg_ceil(upb_Decoder* d, const upb_MiniTable* m, + int msg_ceil_bytes) { + size_t size = m->UPB_PRIVATE(size) + sizeof(upb_Message_Internal); + char* msg_data; + if (UPB_LIKELY(msg_ceil_bytes > 0 && + UPB_PRIVATE(_upb_ArenaHas)(&d->arena) >= msg_ceil_bytes)) { + UPB_ASSERT(size <= (size_t)msg_ceil_bytes); + msg_data = d->arena.UPB_PRIVATE(ptr); + d->arena.UPB_PRIVATE(ptr) += size; + UPB_UNPOISON_MEMORY_REGION(msg_data, msg_ceil_bytes); + memset(msg_data, 0, msg_ceil_bytes); + UPB_POISON_MEMORY_REGION(msg_data + size, msg_ceil_bytes - size); + } else { + msg_data = (char*)upb_Arena_Malloc(&d->arena, size); + memset(msg_data, 0, size); } + return msg_data + sizeof(upb_Message_Internal); +} - *size = (e->limit - e->ptr) - pre_len; +typedef struct { + intptr_t table; + upb_Message* msg; +} fastdecode_submsgdata; + +UPB_FORCEINLINE +static const char* fastdecode_tosubmsg(upb_EpsCopyInputStream* e, + const char* ptr, void* ctx) { + upb_Decoder* d = (upb_Decoder*)e; + fastdecode_submsgdata* submsg = ctx; + ptr = fastdecode_dispatch(d, ptr, submsg->msg, submsg->table, 0, 0); + UPB_ASSUME(ptr != NULL); + return ptr; } -static upb_EncodeStatus upb_Encoder_Encode(upb_encstate* const encoder, - const upb_Message* const msg, - const upb_MiniTable* const l, - char** const buf, - size_t* const size) { - // Unfortunately we must continue to perform hackery here because there are - // code paths which blindly copy the returned pointer without bothering to - // check for errors until much later (b/235839510). So we still set *buf to - // NULL on error and we still set it to non-NULL on a successful empty result. - if (UPB_SETJMP(encoder->err) == 0) { - encode_message(encoder, msg, l, size); - *size = encoder->limit - encoder->ptr; - if (*size == 0) { - static char ch; - *buf = &ch; - } else { - UPB_ASSERT(encoder->ptr); - *buf = encoder->ptr; - } - } else { - UPB_ASSERT(encoder->status != kUpb_EncodeStatus_Ok); - *buf = NULL; - *size = 0; +#define FASTDECODE_SUBMSG(d, ptr, msg, table, hasbits, data, tagbytes, \ + msg_ceil_bytes, card) \ + \ + if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ + RETURN_GENERIC("submessage field tag mismatch\n"); \ + } \ + \ + if (--d->depth == 0) { \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_MaxDepthExceeded); \ + } \ + \ + upb_Message** dst; \ + uint32_t submsg_idx = (data >> 16) & 0xff; \ + const upb_MiniTable* tablep = decode_totablep(table); \ + const upb_MiniTable* subtablep = upb_MiniTableSub_Message( \ + *UPB_PRIVATE(_upb_MiniTable_GetSubByIndex)(tablep, submsg_idx)); \ + fastdecode_submsgdata submsg = {decode_totable(subtablep)}; \ + fastdecode_arr farr; \ + \ + if (subtablep->UPB_PRIVATE(table_mask) == (uint8_t)-1) { \ + d->depth++; \ + RETURN_GENERIC("submessage doesn't have fast tables."); \ + } \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ + sizeof(upb_Message*), card); \ + \ + if (card == CARD_s) { \ + *(uint32_t*)msg |= hasbits; \ + hasbits = 0; \ + } \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_Message*)); \ + } \ + \ + submsg.msg = *dst; \ + \ + if (card == CARD_r || UPB_LIKELY(!submsg.msg)) { \ + *dst = submsg.msg = decode_newmsg_ceil(d, subtablep, msg_ceil_bytes); \ + } \ + \ + ptr += tagbytes; \ + ptr = fastdecode_delimited(d, ptr, fastdecode_tosubmsg, &submsg); \ + \ + if (UPB_UNLIKELY(ptr == NULL || d->end_group != DECODE_NOGROUP)) { \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ + } \ + \ + if (card == CARD_r) { \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_Message*)); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + d->depth++; \ + data = ret.tag; \ + UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + d->depth++; \ + return ptr; \ + } \ + } \ + \ + d->depth++; \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); + +#define F(card, tagbytes, size_ceil, ceil_arg) \ + const char* upb_p##card##m_##tagbytes##bt_max##size_ceil##b( \ + UPB_PARSE_PARAMS) { \ + FASTDECODE_SUBMSG(d, ptr, msg, table, hasbits, data, tagbytes, ceil_arg, \ + CARD_##card); \ } - _upb_mapsorter_destroy(&encoder->sorter); - return encoder->status; -} +#define SIZES(card, tagbytes) \ + F(card, tagbytes, 64, 64) \ + F(card, tagbytes, 128, 128) \ + F(card, tagbytes, 192, 192) \ + F(card, tagbytes, 256, 256) \ + F(card, tagbytes, max, -1) -upb_EncodeStatus upb_Encode(const upb_Message* msg, const upb_MiniTable* l, - int options, upb_Arena* arena, char** buf, - size_t* size) { - upb_encstate e; - unsigned depth = (unsigned)options >> 16; +#define TAGBYTES(card) \ + SIZES(card, 1) \ + SIZES(card, 2) - e.status = kUpb_EncodeStatus_Ok; - e.arena = arena; - e.buf = NULL; - e.limit = NULL; - e.ptr = NULL; - e.depth = depth ? depth : kUpb_WireFormat_DefaultDepthLimit; - e.options = options; - _upb_mapsorter_init(&e.sorter); +TAGBYTES(s) +TAGBYTES(o) +TAGBYTES(r) + +#undef TAGBYTES +#undef SIZES +#undef F +#undef FASTDECODE_SUBMSG + +#endif /* UPB_FASTTABLE */ - return upb_Encoder_Encode(&e, msg, l, buf, size); -} +#include +#include // Must be last. -UPB_NOINLINE _upb_WireReader_ReadLongVarintRet -_upb_WireReader_ReadLongVarint(const char* ptr, uint64_t val) { - _upb_WireReader_ReadLongVarintRet ret = {NULL, 0}; +UPB_NOINLINE UPB_PRIVATE(_upb_WireReader_LongVarint) + UPB_PRIVATE(_upb_WireReader_ReadLongVarint)(const char* ptr, uint64_t val) { + UPB_PRIVATE(_upb_WireReader_LongVarint) ret = {NULL, 0}; uint64_t byte; int i; for (i = 1; i < 10; i++) { @@ -15773,9 +15776,9 @@ _upb_WireReader_ReadLongVarint(const char* ptr, uint64_t val) { return ret; } -const char* _upb_WireReader_SkipGroup(const char* ptr, uint32_t tag, - int depth_limit, - upb_EpsCopyInputStream* stream) { +const char* UPB_PRIVATE(_upb_WireReader_SkipGroup)( + const char* ptr, uint32_t tag, int depth_limit, + upb_EpsCopyInputStream* stream) { if (--depth_limit == 0) return NULL; uint32_t end_group_tag = (tag & ~7ULL) | kUpb_WireType_EndGroup; while (!upb_EpsCopyInputStream_IsDone(stream, &ptr)) { diff --git a/php/ext/google/protobuf/php-upb.h b/php/ext/google/protobuf/php-upb.h index 87af0ee6e5ae..aaeb1170b9ee 100644 --- a/php/ext/google/protobuf/php-upb.h +++ b/php/ext/google/protobuf/php-upb.h @@ -1354,8 +1354,9 @@ UPB_INLINE const upb_MiniTable* UPB_PRIVATE(_upb_MiniTableSub_Message)( // Must be last. struct upb_Decoder; +struct upb_Message; typedef const char* _upb_FieldParser(struct upb_Decoder* d, const char* ptr, - upb_Message* msg, intptr_t table, + struct upb_Message* msg, intptr_t table, uint64_t hasbits, uint64_t data); typedef struct { uint64_t field_data; @@ -4004,6 +4005,72 @@ UPB_API upb_DecodeStatus upb_Decode(const char* buf, size_t size, #endif /* UPB_WIRE_DECODE_H_ */ +// upb_Encode: parsing from a upb_Message using a upb_MiniTable. + +#ifndef UPB_WIRE_ENCODE_H_ +#define UPB_WIRE_ENCODE_H_ + +#include +#include + + +// Must be last. + +#ifdef __cplusplus +extern "C" { +#endif + +enum { + /* If set, the results of serializing will be deterministic across all + * instances of this binary. There are no guarantees across different + * binary builds. + * + * If your proto contains maps, the encoder will need to malloc()/free() + * memory during encode. */ + kUpb_EncodeOption_Deterministic = 1, + + // When set, unknown fields are not printed. + kUpb_EncodeOption_SkipUnknown = 2, + + // When set, the encode will fail if any required fields are missing. + kUpb_EncodeOption_CheckRequired = 4, +}; + +typedef enum { + kUpb_EncodeStatus_Ok = 0, + kUpb_EncodeStatus_OutOfMemory = 1, // Arena alloc failed + kUpb_EncodeStatus_MaxDepthExceeded = 2, + + // kUpb_EncodeOption_CheckRequired failed but the parse otherwise succeeded. + kUpb_EncodeStatus_MissingRequired = 3, +} upb_EncodeStatus; + +UPB_INLINE uint32_t upb_EncodeOptions_MaxDepth(uint16_t depth) { + return (uint32_t)depth << 16; +} + +UPB_INLINE uint16_t upb_EncodeOptions_GetMaxDepth(uint32_t options) { + return options >> 16; +} + +// Enforce an upper bound on recursion depth. +UPB_INLINE int upb_Encode_LimitDepth(uint32_t encode_options, uint32_t limit) { + uint32_t max_depth = upb_EncodeOptions_GetMaxDepth(encode_options); + if (max_depth > limit) max_depth = limit; + return upb_EncodeOptions_MaxDepth(max_depth) | (encode_options & 0xffff); +} + +UPB_API upb_EncodeStatus upb_Encode(const upb_Message* msg, + const upb_MiniTable* l, int options, + upb_Arena* arena, char** buf, size_t* size); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* UPB_WIRE_ENCODE_H_ */ + // These are the specialized field parser functions for the fast parser. // Generated tables will refer to these by name. // @@ -4038,8 +4105,8 @@ UPB_API upb_DecodeStatus upb_Decode(const char* buf, size_t size, // - '1' for one-byte tags (field numbers 1-15) // - '2' for two-byte tags (field numbers 16-2048) -#ifndef UPB_WIRE_DECODE_FAST_H_ -#define UPB_WIRE_DECODE_FAST_H_ +#ifndef UPB_WIRE_DECODE_INTERNAL_FAST_H_ +#define UPB_WIRE_DECODE_INTERNAL_FAST_H_ // Must be last. @@ -4140,73 +4207,7 @@ TAGBYTES(r) #endif -#endif /* UPB_WIRE_DECODE_FAST_H_ */ - -// upb_Encode: parsing from a upb_Message using a upb_MiniTable. - -#ifndef UPB_WIRE_ENCODE_H_ -#define UPB_WIRE_ENCODE_H_ - -#include -#include - - -// Must be last. - -#ifdef __cplusplus -extern "C" { -#endif - -enum { - /* If set, the results of serializing will be deterministic across all - * instances of this binary. There are no guarantees across different - * binary builds. - * - * If your proto contains maps, the encoder will need to malloc()/free() - * memory during encode. */ - kUpb_EncodeOption_Deterministic = 1, - - // When set, unknown fields are not printed. - kUpb_EncodeOption_SkipUnknown = 2, - - // When set, the encode will fail if any required fields are missing. - kUpb_EncodeOption_CheckRequired = 4, -}; - -typedef enum { - kUpb_EncodeStatus_Ok = 0, - kUpb_EncodeStatus_OutOfMemory = 1, // Arena alloc failed - kUpb_EncodeStatus_MaxDepthExceeded = 2, - - // kUpb_EncodeOption_CheckRequired failed but the parse otherwise succeeded. - kUpb_EncodeStatus_MissingRequired = 3, -} upb_EncodeStatus; - -UPB_INLINE uint32_t upb_EncodeOptions_MaxDepth(uint16_t depth) { - return (uint32_t)depth << 16; -} - -UPB_INLINE uint16_t upb_EncodeOptions_GetMaxDepth(uint32_t options) { - return options >> 16; -} - -// Enforce an upper bound on recursion depth. -UPB_INLINE int upb_Encode_LimitDepth(uint32_t encode_options, uint32_t limit) { - uint32_t max_depth = upb_EncodeOptions_GetMaxDepth(encode_options); - if (max_depth > limit) max_depth = limit; - return upb_EncodeOptions_MaxDepth(max_depth) | (encode_options & 0xffff); -} - -UPB_API upb_EncodeStatus upb_Encode(const upb_Message* msg, - const upb_MiniTable* l, int options, - upb_Arena* arena, char** buf, size_t* size); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - - -#endif /* UPB_WIRE_ENCODE_H_ */ +#endif /* UPB_WIRE_DECODE_INTERNAL_FAST_H_ */ // IWYU pragma: end_exports #endif // UPB_GENERATED_CODE_SUPPORT_H_ @@ -13714,8 +13715,8 @@ UPB_INLINE uint32_t _upb_FastDecoder_LoadTag(const char* ptr) { #endif /* UPB_WIRE_INTERNAL_DECODE_H_ */ -#ifndef UPB_WIRE_INTERNAL_SWAP_H_ -#define UPB_WIRE_INTERNAL_SWAP_H_ +#ifndef UPB_WIRE_INTERNAL_ENDIAN_H_ +#define UPB_WIRE_INTERNAL_ENDIAN_H_ #include @@ -13725,23 +13726,23 @@ UPB_INLINE uint32_t _upb_FastDecoder_LoadTag(const char* ptr) { extern "C" { #endif -UPB_INLINE bool _upb_IsLittleEndian(void) { - int x = 1; +UPB_INLINE bool UPB_PRIVATE(_upb_IsLittleEndian)(void) { + const int x = 1; return *(char*)&x == 1; } -UPB_INLINE uint32_t _upb_BigEndian_Swap32(uint32_t val) { - if (_upb_IsLittleEndian()) return val; +UPB_INLINE uint32_t UPB_PRIVATE(_upb_BigEndian32)(uint32_t val) { + if (UPB_PRIVATE(_upb_IsLittleEndian)()) return val; return ((val & 0xff) << 24) | ((val & 0xff00) << 8) | ((val & 0xff0000) >> 8) | ((val & 0xff000000) >> 24); } -UPB_INLINE uint64_t _upb_BigEndian_Swap64(uint64_t val) { - if (_upb_IsLittleEndian()) return val; +UPB_INLINE uint64_t UPB_PRIVATE(_upb_BigEndian64)(uint64_t val) { + if (UPB_PRIVATE(_upb_IsLittleEndian)()) return val; - return ((uint64_t)_upb_BigEndian_Swap32((uint32_t)val) << 32) | - _upb_BigEndian_Swap32((uint32_t)(val >> 32)); + return ((uint64_t)UPB_PRIVATE(_upb_BigEndian32)((uint32_t)val) << 32) | + UPB_PRIVATE(_upb_BigEndian32)((uint32_t)(val >> 32)); } #ifdef __cplusplus @@ -13749,62 +13750,42 @@ UPB_INLINE uint64_t _upb_BigEndian_Swap64(uint64_t val) { #endif -#endif /* UPB_WIRE_INTERNAL_SWAP_H_ */ +#endif /* UPB_WIRE_INTERNAL_ENDIAN_H_ */ #ifndef UPB_WIRE_READER_H_ #define UPB_WIRE_READER_H_ -#ifndef UPB_WIRE_TYPES_H_ -#define UPB_WIRE_TYPES_H_ - -// A list of types as they are encoded on the wire. -typedef enum { - kUpb_WireType_Varint = 0, - kUpb_WireType_64Bit = 1, - kUpb_WireType_Delimited = 2, - kUpb_WireType_StartGroup = 3, - kUpb_WireType_EndGroup = 4, - kUpb_WireType_32Bit = 5 -} upb_WireType; - -#endif /* UPB_WIRE_TYPES_H_ */ +#ifndef UPB_WIRE_INTERNAL_READER_H_ +#define UPB_WIRE_INTERNAL_READER_H_ // Must be last. -#ifdef __cplusplus -extern "C" { -#endif - -// The upb_WireReader interface is suitable for general-purpose parsing of -// protobuf binary wire format. It is designed to be used along with -// upb_EpsCopyInputStream for buffering, and all parsing routines in this file -// assume that at least kUpb_EpsCopyInputStream_SlopBytes worth of data is -// available to read without any bounds checks. - -#define kUpb_WireReader_WireTypeMask 7 #define kUpb_WireReader_WireTypeBits 3 +#define kUpb_WireReader_WireTypeMask 7 typedef struct { const char* ptr; uint64_t val; -} _upb_WireReader_ReadLongVarintRet; +} UPB_PRIVATE(_upb_WireReader_LongVarint); -_upb_WireReader_ReadLongVarintRet _upb_WireReader_ReadLongVarint( - const char* ptr, uint64_t val); +#ifdef __cplusplus +extern "C" { +#endif + +UPB_PRIVATE(_upb_WireReader_LongVarint) +UPB_PRIVATE(_upb_WireReader_ReadLongVarint)(const char* ptr, uint64_t val); -static UPB_FORCEINLINE const char* _upb_WireReader_ReadVarint(const char* ptr, - uint64_t* val, - int maxlen, - uint64_t maxval) { +static UPB_FORCEINLINE const char* UPB_PRIVATE(_upb_WireReader_ReadVarint)( + const char* ptr, uint64_t* val, int maxlen, uint64_t maxval) { uint64_t byte = (uint8_t)*ptr; if (UPB_LIKELY((byte & 0x80) == 0)) { *val = (uint32_t)byte; return ptr + 1; } const char* start = ptr; - _upb_WireReader_ReadLongVarintRet res = - _upb_WireReader_ReadLongVarint(ptr, byte); + UPB_PRIVATE(_upb_WireReader_LongVarint) + res = UPB_PRIVATE(_upb_WireReader_ReadLongVarint)(ptr, byte); if (!res.ptr || (maxlen < 10 && res.ptr - start > maxlen) || res.val > maxval) { return NULL; // Malformed. @@ -13813,6 +13794,48 @@ static UPB_FORCEINLINE const char* _upb_WireReader_ReadVarint(const char* ptr, return res.ptr; } +UPB_INLINE uint32_t UPB_PRIVATE(_upb_WireReader_GetFieldNumber)(uint32_t tag) { + return tag >> kUpb_WireReader_WireTypeBits; +} + +UPB_INLINE uint8_t UPB_PRIVATE(_upb_WireReader_GetWireType)(uint32_t tag) { + return tag & kUpb_WireReader_WireTypeMask; +} + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif // UPB_WIRE_INTERNAL_READER_H_ + +#ifndef UPB_WIRE_TYPES_H_ +#define UPB_WIRE_TYPES_H_ + +// A list of types as they are encoded on the wire. +typedef enum { + kUpb_WireType_Varint = 0, + kUpb_WireType_64Bit = 1, + kUpb_WireType_Delimited = 2, + kUpb_WireType_StartGroup = 3, + kUpb_WireType_EndGroup = 4, + kUpb_WireType_32Bit = 5 +} upb_WireType; + +#endif /* UPB_WIRE_TYPES_H_ */ + +// Must be last. + +// The upb_WireReader interface is suitable for general-purpose parsing of +// protobuf binary wire format. It is designed to be used along with +// upb_EpsCopyInputStream for buffering, and all parsing routines in this file +// assume that at least kUpb_EpsCopyInputStream_SlopBytes worth of data is +// available to read without any bounds checks. + +#ifdef __cplusplus +extern "C" { +#endif + // Parses a tag into `tag`, and returns a pointer past the end of the tag, or // NULL if there was an error in the tag data. // @@ -13822,7 +13845,7 @@ static UPB_FORCEINLINE const char* _upb_WireReader_ReadVarint(const char* ptr, static UPB_FORCEINLINE const char* upb_WireReader_ReadTag(const char* ptr, uint32_t* tag) { uint64_t val; - ptr = _upb_WireReader_ReadVarint(ptr, &val, 5, UINT32_MAX); + ptr = UPB_PRIVATE(_upb_WireReader_ReadVarint)(ptr, &val, 5, UINT32_MAX); if (!ptr) return NULL; *tag = val; return ptr; @@ -13830,17 +13853,17 @@ static UPB_FORCEINLINE const char* upb_WireReader_ReadTag(const char* ptr, // Given a tag, returns the field number. UPB_INLINE uint32_t upb_WireReader_GetFieldNumber(uint32_t tag) { - return tag >> kUpb_WireReader_WireTypeBits; + return UPB_PRIVATE(_upb_WireReader_GetFieldNumber)(tag); } // Given a tag, returns the wire type. UPB_INLINE uint8_t upb_WireReader_GetWireType(uint32_t tag) { - return tag & kUpb_WireReader_WireTypeMask; + return UPB_PRIVATE(_upb_WireReader_GetWireType)(tag); } UPB_INLINE const char* upb_WireReader_ReadVarint(const char* ptr, uint64_t* val) { - return _upb_WireReader_ReadVarint(ptr, val, 10, UINT64_MAX); + return UPB_PRIVATE(_upb_WireReader_ReadVarint)(ptr, val, 10, UINT64_MAX); } // Skips data for a varint, returning a pointer past the end of the varint, or @@ -13876,7 +13899,7 @@ UPB_INLINE const char* upb_WireReader_ReadSize(const char* ptr, int* size) { UPB_INLINE const char* upb_WireReader_ReadFixed32(const char* ptr, void* val) { uint32_t uval; memcpy(&uval, ptr, 4); - uval = _upb_BigEndian_Swap32(uval); + uval = UPB_PRIVATE(_upb_BigEndian32)(uval); memcpy(val, &uval, 4); return ptr + 4; } @@ -13889,14 +13912,14 @@ UPB_INLINE const char* upb_WireReader_ReadFixed32(const char* ptr, void* val) { UPB_INLINE const char* upb_WireReader_ReadFixed64(const char* ptr, void* val) { uint64_t uval; memcpy(&uval, ptr, 8); - uval = _upb_BigEndian_Swap64(uval); + uval = UPB_PRIVATE(_upb_BigEndian64)(uval); memcpy(val, &uval, 8); return ptr + 8; } -const char* _upb_WireReader_SkipGroup(const char* ptr, uint32_t tag, - int depth_limit, - upb_EpsCopyInputStream* stream); +const char* UPB_PRIVATE(_upb_WireReader_SkipGroup)( + const char* ptr, uint32_t tag, int depth_limit, + upb_EpsCopyInputStream* stream); // Skips data for a group, returning a pointer past the end of the group, or // NULL if there was an error parsing the group. The `tag` argument should be @@ -13909,7 +13932,7 @@ const char* _upb_WireReader_SkipGroup(const char* ptr, uint32_t tag, // control over this? UPB_INLINE const char* upb_WireReader_SkipGroup( const char* ptr, uint32_t tag, upb_EpsCopyInputStream* stream) { - return _upb_WireReader_SkipGroup(ptr, tag, 100, stream); + return UPB_PRIVATE(_upb_WireReader_SkipGroup)(ptr, tag, 100, stream); } UPB_INLINE const char* _upb_WireReader_SkipValue( @@ -13930,7 +13953,8 @@ UPB_INLINE const char* _upb_WireReader_SkipValue( return ptr; } case kUpb_WireType_StartGroup: - return _upb_WireReader_SkipGroup(ptr, tag, depth_limit, stream); + return UPB_PRIVATE(_upb_WireReader_SkipGroup)(ptr, tag, depth_limit, + stream); case kUpb_WireType_EndGroup: return NULL; // Should be handled before now. default: diff --git a/ruby/ext/google/protobuf_c/ruby-upb.c b/ruby/ext/google/protobuf_c/ruby-upb.c index bcfca4d07e32..27aa17ec2c23 100644 --- a/ruby/ext/google/protobuf_c/ruby-upb.c +++ b/ruby/ext/google/protobuf_c/ruby-upb.c @@ -12487,7 +12487,7 @@ static const char* upb_Decoder_DecodeSize(upb_Decoder* d, const char* ptr, } static void _upb_Decoder_MungeInt32(wireval* val) { - if (!_upb_IsLittleEndian()) { + if (!UPB_PRIVATE(_upb_IsLittleEndian)()) { /* The next stage will memcpy(dst, &val, 4) */ val->uint32_val = val->uint64_val; } @@ -12707,7 +12707,7 @@ static const char* _upb_Decoder_DecodeFixedPacked( arr->UPB_PRIVATE(size) += count; // Note: if/when the decoder supports multi-buffer input, we will need to // handle buffer seams here. - if (_upb_IsLittleEndian()) { + if (UPB_PRIVATE(_upb_IsLittleEndian)()) { ptr = upb_EpsCopyInputStream_Copy(&d->input, ptr, mem, val->size); } else { int delta = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, val->size); @@ -13031,7 +13031,7 @@ const char* _upb_Decoder_CheckRequired(upb_Decoder* d, const char* ptr, } uint64_t msg_head; memcpy(&msg_head, msg, 8); - msg_head = _upb_BigEndian_Swap64(msg_head); + msg_head = UPB_PRIVATE(_upb_BigEndian64)(msg_head); if (UPB_PRIVATE(_upb_MiniTable_RequiredMask)(m) & ~msg_head) { d->missing_required = true; } @@ -13660,1621 +13660,1624 @@ upb_DecodeStatus upb_Decode(const char* buf, size_t size, upb_Message* msg, #undef OP_FIXPCK_LG2 #undef OP_VARPCK_LG2 -// Fast decoder: ~3x the speed of decode.c, but requires x86-64/ARM64. -// Also the table size grows by 2x. -// -// Could potentially be ported to other 64-bit archs that pass at least six -// arguments in registers and have 8 unused high bits in pointers. -// -// The overall design is to create specialized functions for every possible -// field type (eg. oneof boolean field with a 1 byte tag) and then dispatch -// to the specialized function as quickly as possible. +// We encode backwards, to avoid pre-computing lengths (one-pass encode). +#include +#include +#include +#include + // Must be last. -#if UPB_FASTTABLE +#define UPB_PB_VARINT_MAX_LEN 10 -// The standard set of arguments passed to each parsing function. -// Thanks to x86-64 calling conventions, these will stay in registers. -#define UPB_PARSE_PARAMS \ - upb_Decoder *d, const char *ptr, upb_Message *msg, intptr_t table, \ - uint64_t hasbits, uint64_t data +UPB_NOINLINE +static size_t encode_varint64(uint64_t val, char* buf) { + size_t i = 0; + do { + uint8_t byte = val & 0x7fU; + val >>= 7; + if (val) byte |= 0x80U; + buf[i++] = byte; + } while (val); + return i; +} -#define UPB_PARSE_ARGS d, ptr, msg, table, hasbits, data +static uint32_t encode_zz32(int32_t n) { + return ((uint32_t)n << 1) ^ (n >> 31); +} +static uint64_t encode_zz64(int64_t n) { + return ((uint64_t)n << 1) ^ (n >> 63); +} -#define RETURN_GENERIC(m) \ - /* Uncomment either of these for debugging purposes. */ \ - /* fprintf(stderr, m); */ \ - /*__builtin_trap(); */ \ - return _upb_FastDecoder_DecodeGeneric(d, ptr, msg, table, hasbits, 0); +typedef struct { + upb_EncodeStatus status; + jmp_buf err; + upb_Arena* arena; + char *buf, *ptr, *limit; + int options; + int depth; + _upb_mapsorter sorter; +} upb_encstate; -typedef enum { - CARD_s = 0, /* Singular (optional, non-repeated) */ - CARD_o = 1, /* Oneof */ - CARD_r = 2, /* Repeated */ - CARD_p = 3 /* Packed Repeated */ -} upb_card; +static size_t upb_roundup_pow2(size_t bytes) { + size_t ret = 128; + while (ret < bytes) { + ret *= 2; + } + return ret; +} -UPB_NOINLINE -static const char* fastdecode_isdonefallback(UPB_PARSE_PARAMS) { - int overrun = data; - ptr = _upb_EpsCopyInputStream_IsDoneFallbackInline( - &d->input, ptr, overrun, _upb_Decoder_BufferFlipCallback); - data = _upb_FastDecoder_LoadTag(ptr); - UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); +UPB_NORETURN static void encode_err(upb_encstate* e, upb_EncodeStatus s) { + UPB_ASSERT(s != kUpb_EncodeStatus_Ok); + e->status = s; + UPB_LONGJMP(e->err, 1); } -UPB_FORCEINLINE -static const char* fastdecode_dispatch(UPB_PARSE_PARAMS) { - int overrun; - switch (upb_EpsCopyInputStream_IsDoneStatus(&d->input, ptr, &overrun)) { - case kUpb_IsDoneStatus_Done: - *(uint32_t*)msg |= hasbits; // Sync hasbits. - const upb_MiniTable* m = decode_totablep(table); - return UPB_UNLIKELY(m->UPB_PRIVATE(required_count)) - ? _upb_Decoder_CheckRequired(d, ptr, msg, m) - : ptr; - case kUpb_IsDoneStatus_NotDone: - break; - case kUpb_IsDoneStatus_NeedFallback: - data = overrun; - UPB_MUSTTAIL return fastdecode_isdonefallback(UPB_PARSE_ARGS); - } +UPB_NOINLINE +static void encode_growbuffer(upb_encstate* e, size_t bytes) { + size_t old_size = e->limit - e->buf; + size_t new_size = upb_roundup_pow2(bytes + (e->limit - e->ptr)); + char* new_buf = upb_Arena_Realloc(e->arena, e->buf, old_size, new_size); - // Read two bytes of tag data (for a one-byte tag, the high byte is junk). - data = _upb_FastDecoder_LoadTag(ptr); - UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); -} + if (!new_buf) encode_err(e, kUpb_EncodeStatus_OutOfMemory); -UPB_FORCEINLINE -static bool fastdecode_checktag(uint16_t data, int tagbytes) { - if (tagbytes == 1) { - return (data & 0xff) == 0; - } else { - return data == 0; + // We want previous data at the end, realloc() put it at the beginning. + // TODO: This is somewhat inefficient since we are copying twice. + // Maybe create a realloc() that copies to the end of the new buffer? + if (old_size > 0) { + memmove(new_buf + new_size - old_size, e->buf, old_size); } -} -UPB_FORCEINLINE -static const char* fastdecode_longsize(const char* ptr, int* size) { - int i; - UPB_ASSERT(*size & 0x80); - *size &= 0xff; - for (i = 0; i < 3; i++) { - ptr++; - size_t byte = (uint8_t)ptr[-1]; - *size += (byte - 1) << (7 + 7 * i); - if (UPB_LIKELY((byte & 0x80) == 0)) return ptr; - } - ptr++; - size_t byte = (uint8_t)ptr[-1]; - // len is limited by 2gb not 4gb, hence 8 and not 16 as normally expected - // for a 32 bit varint. - if (UPB_UNLIKELY(byte >= 8)) return NULL; - *size += (byte - 1) << 28; - return ptr; + e->ptr = new_buf + new_size - (e->limit - e->ptr); + e->limit = new_buf + new_size; + e->buf = new_buf; + + e->ptr -= bytes; } +/* Call to ensure that at least "bytes" bytes are available for writing at + * e->ptr. Returns false if the bytes could not be allocated. */ UPB_FORCEINLINE -static const char* fastdecode_delimited( - upb_Decoder* d, const char* ptr, - upb_EpsCopyInputStream_ParseDelimitedFunc* func, void* ctx) { - ptr++; - - // Sign-extend so varint greater than one byte becomes negative, causing - // fast delimited parse to fail. - int len = (int8_t)ptr[-1]; - - if (!upb_EpsCopyInputStream_TryParseDelimitedFast(&d->input, &ptr, len, func, - ctx)) { - // Slow case: Sub-message is >=128 bytes and/or exceeds the current buffer. - // If it exceeds the buffer limit, limit/limit_ptr will change during - // sub-message parsing, so we need to preserve delta, not limit. - if (UPB_UNLIKELY(len & 0x80)) { - // Size varint >1 byte (length >= 128). - ptr = fastdecode_longsize(ptr, &len); - if (!ptr) { - // Corrupt wire format: size exceeded INT_MAX. - return NULL; - } - } - if (!upb_EpsCopyInputStream_CheckSize(&d->input, ptr, len)) { - // Corrupt wire format: invalid limit. - return NULL; - } - int delta = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, len); - ptr = func(&d->input, ptr, ctx); - upb_EpsCopyInputStream_PopLimit(&d->input, ptr, delta); +static void encode_reserve(upb_encstate* e, size_t bytes) { + if ((size_t)(e->ptr - e->buf) < bytes) { + encode_growbuffer(e, bytes); + return; } - return ptr; + + e->ptr -= bytes; } -/* singular, oneof, repeated field handling ***********************************/ +/* Writes the given bytes to the buffer, handling reserve/advance. */ +static void encode_bytes(upb_encstate* e, const void* data, size_t len) { + if (len == 0) return; /* memcpy() with zero size is UB */ + encode_reserve(e, len); + memcpy(e->ptr, data, len); +} -typedef struct { - upb_Array* arr; - void* end; -} fastdecode_arr; +static void encode_fixed64(upb_encstate* e, uint64_t val) { + val = UPB_PRIVATE(_upb_BigEndian64)(val); + encode_bytes(e, &val, sizeof(uint64_t)); +} -typedef enum { - FD_NEXT_ATLIMIT, - FD_NEXT_SAMEFIELD, - FD_NEXT_OTHERFIELD -} fastdecode_next; +static void encode_fixed32(upb_encstate* e, uint32_t val) { + val = UPB_PRIVATE(_upb_BigEndian32)(val); + encode_bytes(e, &val, sizeof(uint32_t)); +} -typedef struct { - void* dst; - fastdecode_next next; - uint32_t tag; -} fastdecode_nextret; +UPB_NOINLINE +static void encode_longvarint(upb_encstate* e, uint64_t val) { + size_t len; + char* start; -UPB_FORCEINLINE -static void* fastdecode_resizearr(upb_Decoder* d, void* dst, - fastdecode_arr* farr, int valbytes) { - if (UPB_UNLIKELY(dst == farr->end)) { - size_t old_capacity = farr->arr->UPB_PRIVATE(capacity); - size_t old_bytes = old_capacity * valbytes; - size_t new_capacity = old_capacity * 2; - size_t new_bytes = new_capacity * valbytes; - char* old_ptr = _upb_array_ptr(farr->arr); - char* new_ptr = upb_Arena_Realloc(&d->arena, old_ptr, old_bytes, new_bytes); - uint8_t elem_size_lg2 = __builtin_ctz(valbytes); - UPB_PRIVATE(_upb_Array_SetTaggedPtr)(farr->arr, new_ptr, elem_size_lg2); - farr->arr->UPB_PRIVATE(capacity) = new_capacity; - dst = (void*)(new_ptr + (old_capacity * valbytes)); - farr->end = (void*)(new_ptr + (new_capacity * valbytes)); - } - return dst; + encode_reserve(e, UPB_PB_VARINT_MAX_LEN); + len = encode_varint64(val, e->ptr); + start = e->ptr + UPB_PB_VARINT_MAX_LEN - len; + memmove(start, e->ptr, len); + e->ptr = start; } UPB_FORCEINLINE -static bool fastdecode_tagmatch(uint32_t tag, uint64_t data, int tagbytes) { - if (tagbytes == 1) { - return (uint8_t)tag == (uint8_t)data; +static void encode_varint(upb_encstate* e, uint64_t val) { + if (val < 128 && e->ptr != e->buf) { + --e->ptr; + *e->ptr = val; } else { - return (uint16_t)tag == (uint16_t)data; + encode_longvarint(e, val); } } -UPB_FORCEINLINE -static void fastdecode_commitarr(void* dst, fastdecode_arr* farr, - int valbytes) { - farr->arr->UPB_PRIVATE(size) = - (size_t)((char*)dst - (char*)_upb_array_ptr(farr->arr)) / valbytes; +static void encode_double(upb_encstate* e, double d) { + uint64_t u64; + UPB_ASSERT(sizeof(double) == sizeof(uint64_t)); + memcpy(&u64, &d, sizeof(uint64_t)); + encode_fixed64(e, u64); } -UPB_FORCEINLINE -static fastdecode_nextret fastdecode_nextrepeated(upb_Decoder* d, void* dst, - const char** ptr, - fastdecode_arr* farr, - uint64_t data, int tagbytes, - int valbytes) { - fastdecode_nextret ret; - dst = (char*)dst + valbytes; +static void encode_float(upb_encstate* e, float d) { + uint32_t u32; + UPB_ASSERT(sizeof(float) == sizeof(uint32_t)); + memcpy(&u32, &d, sizeof(uint32_t)); + encode_fixed32(e, u32); +} - if (UPB_LIKELY(!_upb_Decoder_IsDone(d, ptr))) { - ret.tag = _upb_FastDecoder_LoadTag(*ptr); - if (fastdecode_tagmatch(ret.tag, data, tagbytes)) { - ret.next = FD_NEXT_SAMEFIELD; - } else { - fastdecode_commitarr(dst, farr, valbytes); - ret.next = FD_NEXT_OTHERFIELD; +static void encode_tag(upb_encstate* e, uint32_t field_number, + uint8_t wire_type) { + encode_varint(e, (field_number << 3) | wire_type); +} + +static void encode_fixedarray(upb_encstate* e, const upb_Array* arr, + size_t elem_size, uint32_t tag) { + size_t bytes = arr->UPB_PRIVATE(size) * elem_size; + const char* data = _upb_array_constptr(arr); + const char* ptr = data + bytes - elem_size; + + if (tag || !UPB_PRIVATE(_upb_IsLittleEndian)()) { + while (true) { + if (elem_size == 4) { + uint32_t val; + memcpy(&val, ptr, sizeof(val)); + val = UPB_PRIVATE(_upb_BigEndian32)(val); + encode_bytes(e, &val, elem_size); + } else { + UPB_ASSERT(elem_size == 8); + uint64_t val; + memcpy(&val, ptr, sizeof(val)); + val = UPB_PRIVATE(_upb_BigEndian64)(val); + encode_bytes(e, &val, elem_size); + } + + if (tag) encode_varint(e, tag); + if (ptr == data) break; + ptr -= elem_size; } } else { - fastdecode_commitarr(dst, farr, valbytes); - ret.next = FD_NEXT_ATLIMIT; + encode_bytes(e, data, bytes); } - - ret.dst = dst; - return ret; } -UPB_FORCEINLINE -static void* fastdecode_fieldmem(upb_Message* msg, uint64_t data) { - size_t ofs = data >> 48; - return (char*)msg + ofs; +static void encode_message(upb_encstate* e, const upb_Message* msg, + const upb_MiniTable* m, size_t* size); + +static void encode_TaggedMessagePtr(upb_encstate* e, + upb_TaggedMessagePtr tagged, + const upb_MiniTable* m, size_t* size) { + if (upb_TaggedMessagePtr_IsEmpty(tagged)) { + m = UPB_PRIVATE(_upb_MiniTable_Empty)(); + } + encode_message(e, _upb_TaggedMessagePtr_GetMessage(tagged), m, size); } -UPB_FORCEINLINE -static void* fastdecode_getfield(upb_Decoder* d, const char* ptr, - upb_Message* msg, uint64_t* data, - uint64_t* hasbits, fastdecode_arr* farr, - int valbytes, upb_card card) { - switch (card) { - case CARD_s: { - uint8_t hasbit_index = *data >> 24; - // Set hasbit and return pointer to scalar field. - *hasbits |= 1ull << hasbit_index; - return fastdecode_fieldmem(msg, *data); +static void encode_scalar(upb_encstate* e, const void* _field_mem, + const upb_MiniTableSub* subs, + const upb_MiniTableField* f) { + const char* field_mem = _field_mem; + int wire_type; + +#define CASE(ctype, type, wtype, encodeval) \ + { \ + ctype val = *(ctype*)field_mem; \ + encode_##type(e, encodeval); \ + wire_type = wtype; \ + break; \ + } + + switch (f->UPB_PRIVATE(descriptortype)) { + case kUpb_FieldType_Double: + CASE(double, double, kUpb_WireType_64Bit, val); + case kUpb_FieldType_Float: + CASE(float, float, kUpb_WireType_32Bit, val); + case kUpb_FieldType_Int64: + case kUpb_FieldType_UInt64: + CASE(uint64_t, varint, kUpb_WireType_Varint, val); + case kUpb_FieldType_UInt32: + CASE(uint32_t, varint, kUpb_WireType_Varint, val); + case kUpb_FieldType_Int32: + case kUpb_FieldType_Enum: + CASE(int32_t, varint, kUpb_WireType_Varint, (int64_t)val); + case kUpb_FieldType_SFixed64: + case kUpb_FieldType_Fixed64: + CASE(uint64_t, fixed64, kUpb_WireType_64Bit, val); + case kUpb_FieldType_Fixed32: + case kUpb_FieldType_SFixed32: + CASE(uint32_t, fixed32, kUpb_WireType_32Bit, val); + case kUpb_FieldType_Bool: + CASE(bool, varint, kUpb_WireType_Varint, val); + case kUpb_FieldType_SInt32: + CASE(int32_t, varint, kUpb_WireType_Varint, encode_zz32(val)); + case kUpb_FieldType_SInt64: + CASE(int64_t, varint, kUpb_WireType_Varint, encode_zz64(val)); + case kUpb_FieldType_String: + case kUpb_FieldType_Bytes: { + upb_StringView view = *(upb_StringView*)field_mem; + encode_bytes(e, view.data, view.size); + encode_varint(e, view.size); + wire_type = kUpb_WireType_Delimited; + break; } - case CARD_o: { - uint16_t case_ofs = *data >> 32; - uint32_t* oneof_case = UPB_PTR_AT(msg, case_ofs, uint32_t); - uint8_t field_number = *data >> 24; - *oneof_case = field_number; - return fastdecode_fieldmem(msg, *data); + case kUpb_FieldType_Group: { + size_t size; + upb_TaggedMessagePtr submsg = *(upb_TaggedMessagePtr*)field_mem; + const upb_MiniTable* subm = + upb_MiniTableSub_Message(subs[f->UPB_PRIVATE(submsg_index)]); + if (submsg == 0) { + return; + } + if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded); + encode_tag(e, f->UPB_PRIVATE(number), kUpb_WireType_EndGroup); + encode_TaggedMessagePtr(e, submsg, subm, &size); + wire_type = kUpb_WireType_StartGroup; + e->depth++; + break; } - case CARD_r: { - // Get pointer to upb_Array and allocate/expand if necessary. - uint8_t elem_size_lg2 = __builtin_ctz(valbytes); - upb_Array** arr_p = fastdecode_fieldmem(msg, *data); - char* begin; - *(uint32_t*)msg |= *hasbits; - *hasbits = 0; - if (UPB_LIKELY(!*arr_p)) { - farr->arr = UPB_PRIVATE(_upb_Array_New)(&d->arena, 8, elem_size_lg2); - *arr_p = farr->arr; - } else { - farr->arr = *arr_p; + case kUpb_FieldType_Message: { + size_t size; + upb_TaggedMessagePtr submsg = *(upb_TaggedMessagePtr*)field_mem; + const upb_MiniTable* subm = + upb_MiniTableSub_Message(subs[f->UPB_PRIVATE(submsg_index)]); + if (submsg == 0) { + return; } - begin = _upb_array_ptr(farr->arr); - farr->end = begin + (farr->arr->UPB_PRIVATE(capacity) * valbytes); - *data = _upb_FastDecoder_LoadTag(ptr); - return begin + (farr->arr->UPB_PRIVATE(size) * valbytes); + if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded); + encode_TaggedMessagePtr(e, submsg, subm, &size); + encode_varint(e, size); + wire_type = kUpb_WireType_Delimited; + e->depth++; + break; } default: UPB_UNREACHABLE(); } -} +#undef CASE -UPB_FORCEINLINE -static bool fastdecode_flippacked(uint64_t* data, int tagbytes) { - *data ^= (0x2 ^ 0x0); // Patch data to match packed wiretype. - return fastdecode_checktag(*data, tagbytes); + encode_tag(e, f->UPB_PRIVATE(number), wire_type); } -#define FASTDECODE_CHECKPACKED(tagbytes, card, func) \ - if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ - if (card == CARD_r && fastdecode_flippacked(&data, tagbytes)) { \ - UPB_MUSTTAIL return func(UPB_PARSE_ARGS); \ - } \ - RETURN_GENERIC("packed check tag mismatch\n"); \ - } - -/* varint fields **************************************************************/ +static void encode_array(upb_encstate* e, const upb_Message* msg, + const upb_MiniTableSub* subs, + const upb_MiniTableField* f) { + const upb_Array* arr = *UPB_PTR_AT(msg, f->UPB_PRIVATE(offset), upb_Array*); + bool packed = upb_MiniTableField_IsPacked(f); + size_t pre_len = e->limit - e->ptr; -UPB_FORCEINLINE -static uint64_t fastdecode_munge(uint64_t val, int valbytes, bool zigzag) { - if (valbytes == 1) { - return val != 0; - } else if (zigzag) { - if (valbytes == 4) { - uint32_t n = val; - return (n >> 1) ^ -(int32_t)(n & 1); - } else if (valbytes == 8) { - return (val >> 1) ^ -(int64_t)(val & 1); - } - UPB_UNREACHABLE(); + if (arr == NULL || arr->UPB_PRIVATE(size) == 0) { + return; } - return val; -} -UPB_FORCEINLINE -static const char* fastdecode_varint64(const char* ptr, uint64_t* val) { - ptr++; - *val = (uint8_t)ptr[-1]; - if (UPB_UNLIKELY(*val & 0x80)) { - int i; - for (i = 0; i < 8; i++) { - ptr++; - uint64_t byte = (uint8_t)ptr[-1]; - *val += (byte - 1) << (7 + 7 * i); - if (UPB_LIKELY((byte & 0x80) == 0)) goto done; - } - ptr++; - uint64_t byte = (uint8_t)ptr[-1]; - if (byte > 1) { - return NULL; - } - *val += (byte - 1) << 63; - } -done: - UPB_ASSUME(ptr != NULL); - return ptr; -} +#define VARINT_CASE(ctype, encode) \ + { \ + const ctype* start = _upb_array_constptr(arr); \ + const ctype* ptr = start + arr->UPB_PRIVATE(size); \ + uint32_t tag = \ + packed ? 0 : (f->UPB_PRIVATE(number) << 3) | kUpb_WireType_Varint; \ + do { \ + ptr--; \ + encode_varint(e, encode); \ + if (tag) encode_varint(e, tag); \ + } while (ptr != start); \ + } \ + break; -#define FASTDECODE_UNPACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, card, zigzag, packed) \ - uint64_t val; \ - void* dst; \ - fastdecode_arr farr; \ - \ - FASTDECODE_CHECKPACKED(tagbytes, card, packed); \ - \ - dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes, \ - card); \ - if (card == CARD_r) { \ - if (UPB_UNLIKELY(!dst)) { \ - RETURN_GENERIC("need array resize\n"); \ - } \ - } \ - \ - again: \ - if (card == CARD_r) { \ - dst = fastdecode_resizearr(d, dst, &farr, valbytes); \ - } \ - \ - ptr += tagbytes; \ - ptr = fastdecode_varint64(ptr, &val); \ - if (ptr == NULL) _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ - val = fastdecode_munge(val, valbytes, zigzag); \ - memcpy(dst, &val, valbytes); \ - \ - if (card == CARD_r) { \ - fastdecode_nextret ret = fastdecode_nextrepeated( \ - d, dst, &ptr, &farr, data, tagbytes, valbytes); \ - switch (ret.next) { \ - case FD_NEXT_SAMEFIELD: \ - dst = ret.dst; \ - goto again; \ - case FD_NEXT_OTHERFIELD: \ - data = ret.tag; \ - UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ - case FD_NEXT_ATLIMIT: \ - return ptr; \ - } \ - } \ - \ - UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); - -typedef struct { - uint8_t valbytes; - bool zigzag; - void* dst; - fastdecode_arr farr; -} fastdecode_varintdata; - -UPB_FORCEINLINE -static const char* fastdecode_topackedvarint(upb_EpsCopyInputStream* e, - const char* ptr, void* ctx) { - upb_Decoder* d = (upb_Decoder*)e; - fastdecode_varintdata* data = ctx; - void* dst = data->dst; - uint64_t val; +#define TAG(wire_type) (packed ? 0 : (f->UPB_PRIVATE(number) << 3 | wire_type)) - while (!_upb_Decoder_IsDone(d, &ptr)) { - dst = fastdecode_resizearr(d, dst, &data->farr, data->valbytes); - ptr = fastdecode_varint64(ptr, &val); - if (ptr == NULL) return NULL; - val = fastdecode_munge(val, data->valbytes, data->zigzag); - memcpy(dst, &val, data->valbytes); - dst = (char*)dst + data->valbytes; + switch (f->UPB_PRIVATE(descriptortype)) { + case kUpb_FieldType_Double: + encode_fixedarray(e, arr, sizeof(double), TAG(kUpb_WireType_64Bit)); + break; + case kUpb_FieldType_Float: + encode_fixedarray(e, arr, sizeof(float), TAG(kUpb_WireType_32Bit)); + break; + case kUpb_FieldType_SFixed64: + case kUpb_FieldType_Fixed64: + encode_fixedarray(e, arr, sizeof(uint64_t), TAG(kUpb_WireType_64Bit)); + break; + case kUpb_FieldType_Fixed32: + case kUpb_FieldType_SFixed32: + encode_fixedarray(e, arr, sizeof(uint32_t), TAG(kUpb_WireType_32Bit)); + break; + case kUpb_FieldType_Int64: + case kUpb_FieldType_UInt64: + VARINT_CASE(uint64_t, *ptr); + case kUpb_FieldType_UInt32: + VARINT_CASE(uint32_t, *ptr); + case kUpb_FieldType_Int32: + case kUpb_FieldType_Enum: + VARINT_CASE(int32_t, (int64_t)*ptr); + case kUpb_FieldType_Bool: + VARINT_CASE(bool, *ptr); + case kUpb_FieldType_SInt32: + VARINT_CASE(int32_t, encode_zz32(*ptr)); + case kUpb_FieldType_SInt64: + VARINT_CASE(int64_t, encode_zz64(*ptr)); + case kUpb_FieldType_String: + case kUpb_FieldType_Bytes: { + const upb_StringView* start = _upb_array_constptr(arr); + const upb_StringView* ptr = start + arr->UPB_PRIVATE(size); + do { + ptr--; + encode_bytes(e, ptr->data, ptr->size); + encode_varint(e, ptr->size); + encode_tag(e, f->UPB_PRIVATE(number), kUpb_WireType_Delimited); + } while (ptr != start); + return; + } + case kUpb_FieldType_Group: { + const upb_TaggedMessagePtr* start = _upb_array_constptr(arr); + const upb_TaggedMessagePtr* ptr = start + arr->UPB_PRIVATE(size); + const upb_MiniTable* subm = + upb_MiniTableSub_Message(subs[f->UPB_PRIVATE(submsg_index)]); + if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded); + do { + size_t size; + ptr--; + encode_tag(e, f->UPB_PRIVATE(number), kUpb_WireType_EndGroup); + encode_TaggedMessagePtr(e, *ptr, subm, &size); + encode_tag(e, f->UPB_PRIVATE(number), kUpb_WireType_StartGroup); + } while (ptr != start); + e->depth++; + return; + } + case kUpb_FieldType_Message: { + const upb_TaggedMessagePtr* start = _upb_array_constptr(arr); + const upb_TaggedMessagePtr* ptr = start + arr->UPB_PRIVATE(size); + const upb_MiniTable* subm = + upb_MiniTableSub_Message(subs[f->UPB_PRIVATE(submsg_index)]); + if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded); + do { + size_t size; + ptr--; + encode_TaggedMessagePtr(e, *ptr, subm, &size); + encode_varint(e, size); + encode_tag(e, f->UPB_PRIVATE(number), kUpb_WireType_Delimited); + } while (ptr != start); + e->depth++; + return; + } } +#undef VARINT_CASE - fastdecode_commitarr(dst, &data->farr, data->valbytes); - return ptr; + if (packed) { + encode_varint(e, e->limit - e->ptr - pre_len); + encode_tag(e, f->UPB_PRIVATE(number), kUpb_WireType_Delimited); + } } -#define FASTDECODE_PACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, zigzag, unpacked) \ - fastdecode_varintdata ctx = {valbytes, zigzag}; \ - \ - FASTDECODE_CHECKPACKED(tagbytes, CARD_r, unpacked); \ - \ - ctx.dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &ctx.farr, \ - valbytes, CARD_r); \ - if (UPB_UNLIKELY(!ctx.dst)) { \ - RETURN_GENERIC("need array resize\n"); \ - } \ - \ - ptr += tagbytes; \ - ptr = fastdecode_delimited(d, ptr, &fastdecode_topackedvarint, &ctx); \ - \ - if (UPB_UNLIKELY(ptr == NULL)) { \ - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ - } \ - \ - UPB_MUSTTAIL return fastdecode_dispatch(d, ptr, msg, table, hasbits, 0); +static void encode_mapentry(upb_encstate* e, uint32_t number, + const upb_MiniTable* layout, + const upb_MapEntry* ent) { + const upb_MiniTableField* key_field = &layout->UPB_PRIVATE(fields)[0]; + const upb_MiniTableField* val_field = &layout->UPB_PRIVATE(fields)[1]; + size_t pre_len = e->limit - e->ptr; + size_t size; + encode_scalar(e, &ent->data.v, layout->UPB_PRIVATE(subs), val_field); + encode_scalar(e, &ent->data.k, layout->UPB_PRIVATE(subs), key_field); + size = (e->limit - e->ptr) - pre_len; + encode_varint(e, size); + encode_tag(e, number, kUpb_WireType_Delimited); +} -#define FASTDECODE_VARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, card, zigzag, unpacked, packed) \ - if (card == CARD_p) { \ - FASTDECODE_PACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, zigzag, unpacked); \ - } else { \ - FASTDECODE_UNPACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, card, zigzag, packed); \ - } +static void encode_map(upb_encstate* e, const upb_Message* msg, + const upb_MiniTableSub* subs, + const upb_MiniTableField* f) { + const upb_Map* map = *UPB_PTR_AT(msg, f->UPB_PRIVATE(offset), const upb_Map*); + const upb_MiniTable* layout = + upb_MiniTableSub_Message(subs[f->UPB_PRIVATE(submsg_index)]); + UPB_ASSERT(layout->UPB_PRIVATE(field_count) == 2); -#define z_ZZ true -#define b_ZZ false -#define v_ZZ false + if (map == NULL) return; -/* Generate all combinations: - * {s,o,r,p} x {b1,v4,z4,v8,z8} x {1bt,2bt} */ + if (e->options & kUpb_EncodeOption_Deterministic) { + _upb_sortedmap sorted; + _upb_mapsorter_pushmap( + &e->sorter, layout->UPB_PRIVATE(fields)[0].UPB_PRIVATE(descriptortype), + map, &sorted); + upb_MapEntry ent; + while (_upb_sortedmap_next(&e->sorter, map, &sorted, &ent)) { + encode_mapentry(e, f->UPB_PRIVATE(number), layout, &ent); + } + _upb_mapsorter_popmap(&e->sorter, &sorted); + } else { + intptr_t iter = UPB_STRTABLE_BEGIN; + upb_StringView key; + upb_value val; + while (upb_strtable_next2(&map->table, &key, &val, &iter)) { + upb_MapEntry ent; + _upb_map_fromkey(key, &ent.data.k, map->key_size); + _upb_map_fromvalue(val, &ent.data.v, map->val_size); + encode_mapentry(e, f->UPB_PRIVATE(number), layout, &ent); + } + } +} -#define F(card, type, valbytes, tagbytes) \ - UPB_NOINLINE \ - const char* upb_p##card##type##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ - FASTDECODE_VARINT(d, ptr, msg, table, hasbits, data, tagbytes, valbytes, \ - CARD_##card, type##_ZZ, \ - upb_pr##type##valbytes##_##tagbytes##bt, \ - upb_pp##type##valbytes##_##tagbytes##bt); \ +static bool encode_shouldencode(upb_encstate* e, const upb_Message* msg, + const upb_MiniTableSub* subs, + const upb_MiniTableField* f) { + if (f->presence == 0) { + // Proto3 presence or map/array. + const void* mem = UPB_PTR_AT(msg, f->UPB_PRIVATE(offset), void); + switch (UPB_PRIVATE(_upb_MiniTableField_GetRep)(f)) { + case kUpb_FieldRep_1Byte: { + char ch; + memcpy(&ch, mem, 1); + return ch != 0; + } + case kUpb_FieldRep_4Byte: { + uint32_t u32; + memcpy(&u32, mem, 4); + return u32 != 0; + } + case kUpb_FieldRep_8Byte: { + uint64_t u64; + memcpy(&u64, mem, 8); + return u64 != 0; + } + case kUpb_FieldRep_StringView: { + const upb_StringView* str = (const upb_StringView*)mem; + return str->size != 0; + } + default: + UPB_UNREACHABLE(); + } + } else if (f->presence > 0) { + // Proto2 presence: hasbit. + return UPB_PRIVATE(_upb_Message_GetHasbit)(msg, f); + } else { + // Field is in a oneof. + return UPB_PRIVATE(_upb_Message_GetOneofCase)(msg, f) == + f->UPB_PRIVATE(number); } +} -#define TYPES(card, tagbytes) \ - F(card, b, 1, tagbytes) \ - F(card, v, 4, tagbytes) \ - F(card, v, 8, tagbytes) \ - F(card, z, 4, tagbytes) \ - F(card, z, 8, tagbytes) - -#define TAGBYTES(card) \ - TYPES(card, 1) \ - TYPES(card, 2) +static void encode_field(upb_encstate* e, const upb_Message* msg, + const upb_MiniTableSub* subs, + const upb_MiniTableField* field) { + switch (UPB_PRIVATE(_upb_MiniTableField_Mode)(field)) { + case kUpb_FieldMode_Array: + encode_array(e, msg, subs, field); + break; + case kUpb_FieldMode_Map: + encode_map(e, msg, subs, field); + break; + case kUpb_FieldMode_Scalar: + encode_scalar(e, UPB_PTR_AT(msg, field->UPB_PRIVATE(offset), void), subs, + field); + break; + default: + UPB_UNREACHABLE(); + } +} -TAGBYTES(s) -TAGBYTES(o) -TAGBYTES(r) -TAGBYTES(p) +static void encode_msgset_item(upb_encstate* e, const upb_Extension* ext) { + size_t size; + encode_tag(e, kUpb_MsgSet_Item, kUpb_WireType_EndGroup); + encode_message(e, ext->data.ptr, + upb_MiniTableExtension_GetSubMessage(ext->ext), &size); + encode_varint(e, size); + encode_tag(e, kUpb_MsgSet_Message, kUpb_WireType_Delimited); + encode_varint(e, upb_MiniTableExtension_Number(ext->ext)); + encode_tag(e, kUpb_MsgSet_TypeId, kUpb_WireType_Varint); + encode_tag(e, kUpb_MsgSet_Item, kUpb_WireType_StartGroup); +} -#undef z_ZZ -#undef b_ZZ -#undef v_ZZ -#undef o_ONEOF -#undef s_ONEOF -#undef r_ONEOF -#undef F -#undef TYPES -#undef TAGBYTES -#undef FASTDECODE_UNPACKEDVARINT -#undef FASTDECODE_PACKEDVARINT -#undef FASTDECODE_VARINT +static void encode_ext(upb_encstate* e, const upb_Extension* ext, + bool is_message_set) { + if (UPB_UNLIKELY(is_message_set)) { + encode_msgset_item(e, ext); + } else { + encode_field(e, (upb_Message*)&ext->data, &ext->ext->UPB_PRIVATE(sub), + &ext->ext->UPB_PRIVATE(field)); + } +} -/* fixed fields ***************************************************************/ +static void encode_message(upb_encstate* e, const upb_Message* msg, + const upb_MiniTable* m, size_t* size) { + size_t pre_len = e->limit - e->ptr; -#define FASTDECODE_UNPACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, card, packed) \ - void* dst; \ - fastdecode_arr farr; \ - \ - FASTDECODE_CHECKPACKED(tagbytes, card, packed) \ - \ - dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes, \ - card); \ - if (card == CARD_r) { \ - if (UPB_UNLIKELY(!dst)) { \ - RETURN_GENERIC("couldn't allocate array in arena\n"); \ - } \ - } \ - \ - again: \ - if (card == CARD_r) { \ - dst = fastdecode_resizearr(d, dst, &farr, valbytes); \ - } \ - \ - ptr += tagbytes; \ - memcpy(dst, ptr, valbytes); \ - ptr += valbytes; \ - \ - if (card == CARD_r) { \ - fastdecode_nextret ret = fastdecode_nextrepeated( \ - d, dst, &ptr, &farr, data, tagbytes, valbytes); \ - switch (ret.next) { \ - case FD_NEXT_SAMEFIELD: \ - dst = ret.dst; \ - goto again; \ - case FD_NEXT_OTHERFIELD: \ - data = ret.tag; \ - UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ - case FD_NEXT_ATLIMIT: \ - return ptr; \ - } \ - } \ - \ - UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); + if ((e->options & kUpb_EncodeOption_CheckRequired) && + m->UPB_PRIVATE(required_count)) { + uint64_t msg_head; + memcpy(&msg_head, msg, 8); + msg_head = UPB_PRIVATE(_upb_BigEndian64)(msg_head); + if (UPB_PRIVATE(_upb_MiniTable_RequiredMask)(m) & ~msg_head) { + encode_err(e, kUpb_EncodeStatus_MissingRequired); + } + } -#define FASTDECODE_PACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, unpacked) \ - FASTDECODE_CHECKPACKED(tagbytes, CARD_r, unpacked) \ - \ - ptr += tagbytes; \ - int size = (uint8_t)ptr[0]; \ - ptr++; \ - if (size & 0x80) { \ - ptr = fastdecode_longsize(ptr, &size); \ - } \ - \ - if (UPB_UNLIKELY(!upb_EpsCopyInputStream_CheckDataSizeAvailable( \ - &d->input, ptr, size) || \ - (size % valbytes) != 0)) { \ - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ - } \ - \ - upb_Array** arr_p = fastdecode_fieldmem(msg, data); \ - upb_Array* arr = *arr_p; \ - uint8_t elem_size_lg2 = __builtin_ctz(valbytes); \ - int elems = size / valbytes; \ - \ - if (UPB_LIKELY(!arr)) { \ - *arr_p = arr = \ - UPB_PRIVATE(_upb_Array_New)(&d->arena, elems, elem_size_lg2); \ - if (!arr) { \ - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ - } \ - } else { \ - _upb_Array_ResizeUninitialized(arr, elems, &d->arena); \ - } \ - \ - char* dst = _upb_array_ptr(arr); \ - memcpy(dst, ptr, size); \ - arr->UPB_PRIVATE(size) = elems; \ - \ - ptr += size; \ - UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); + if ((e->options & kUpb_EncodeOption_SkipUnknown) == 0) { + size_t unknown_size; + const char* unknown = upb_Message_GetUnknown(msg, &unknown_size); -#define FASTDECODE_FIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, card, unpacked, packed) \ - if (card == CARD_p) { \ - FASTDECODE_PACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, unpacked); \ - } else { \ - FASTDECODE_UNPACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ - valbytes, card, packed); \ + if (unknown) { + encode_bytes(e, unknown, unknown_size); + } } -/* Generate all combinations: - * {s,o,r,p} x {f4,f8} x {1bt,2bt} */ - -#define F(card, valbytes, tagbytes) \ - UPB_NOINLINE \ - const char* upb_p##card##f##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ - FASTDECODE_FIXED(d, ptr, msg, table, hasbits, data, tagbytes, valbytes, \ - CARD_##card, upb_ppf##valbytes##_##tagbytes##bt, \ - upb_prf##valbytes##_##tagbytes##bt); \ + if (m->UPB_PRIVATE(ext) != kUpb_ExtMode_NonExtendable) { + /* Encode all extensions together. Unlike C++, we do not attempt to keep + * these in field number order relative to normal fields or even to each + * other. */ + size_t ext_count; + const upb_Extension* ext = + UPB_PRIVATE(_upb_Message_Getexts)(msg, &ext_count); + if (ext_count) { + if (e->options & kUpb_EncodeOption_Deterministic) { + _upb_sortedmap sorted; + _upb_mapsorter_pushexts(&e->sorter, ext, ext_count, &sorted); + while (_upb_sortedmap_nextext(&e->sorter, &sorted, &ext)) { + encode_ext(e, ext, m->UPB_PRIVATE(ext) == kUpb_ExtMode_IsMessageSet); + } + _upb_mapsorter_popmap(&e->sorter, &sorted); + } else { + const upb_Extension* end = ext + ext_count; + for (; ext != end; ext++) { + encode_ext(e, ext, m->UPB_PRIVATE(ext) == kUpb_ExtMode_IsMessageSet); + } + } + } } -#define TYPES(card, tagbytes) \ - F(card, 4, tagbytes) \ - F(card, 8, tagbytes) + if (m->UPB_PRIVATE(field_count)) { + const upb_MiniTableField* f = + &m->UPB_PRIVATE(fields)[m->UPB_PRIVATE(field_count)]; + const upb_MiniTableField* first = &m->UPB_PRIVATE(fields)[0]; + while (f != first) { + f--; + if (encode_shouldencode(e, msg, m->UPB_PRIVATE(subs), f)) { + encode_field(e, msg, m->UPB_PRIVATE(subs), f); + } + } + } -#define TAGBYTES(card) \ - TYPES(card, 1) \ - TYPES(card, 2) + *size = (e->limit - e->ptr) - pre_len; +} -TAGBYTES(s) -TAGBYTES(o) -TAGBYTES(r) -TAGBYTES(p) +static upb_EncodeStatus upb_Encoder_Encode(upb_encstate* const encoder, + const upb_Message* const msg, + const upb_MiniTable* const l, + char** const buf, + size_t* const size) { + // Unfortunately we must continue to perform hackery here because there are + // code paths which blindly copy the returned pointer without bothering to + // check for errors until much later (b/235839510). So we still set *buf to + // NULL on error and we still set it to non-NULL on a successful empty result. + if (UPB_SETJMP(encoder->err) == 0) { + encode_message(encoder, msg, l, size); + *size = encoder->limit - encoder->ptr; + if (*size == 0) { + static char ch; + *buf = &ch; + } else { + UPB_ASSERT(encoder->ptr); + *buf = encoder->ptr; + } + } else { + UPB_ASSERT(encoder->status != kUpb_EncodeStatus_Ok); + *buf = NULL; + *size = 0; + } -#undef F -#undef TYPES -#undef TAGBYTES -#undef FASTDECODE_UNPACKEDFIXED -#undef FASTDECODE_PACKEDFIXED + _upb_mapsorter_destroy(&encoder->sorter); + return encoder->status; +} -/* string fields **************************************************************/ +upb_EncodeStatus upb_Encode(const upb_Message* msg, const upb_MiniTable* l, + int options, upb_Arena* arena, char** buf, + size_t* size) { + upb_encstate e; + unsigned depth = (unsigned)options >> 16; -typedef const char* fastdecode_copystr_func(struct upb_Decoder* d, - const char* ptr, upb_Message* msg, - const upb_MiniTable* table, - uint64_t hasbits, - upb_StringView* dst); + e.status = kUpb_EncodeStatus_Ok; + e.arena = arena; + e.buf = NULL; + e.limit = NULL; + e.ptr = NULL; + e.depth = depth ? depth : kUpb_WireFormat_DefaultDepthLimit; + e.options = options; + _upb_mapsorter_init(&e.sorter); -UPB_NOINLINE -static const char* fastdecode_verifyutf8(upb_Decoder* d, const char* ptr, - upb_Message* msg, intptr_t table, - uint64_t hasbits, uint64_t data) { - upb_StringView* dst = (upb_StringView*)data; - if (!_upb_Decoder_VerifyUtf8Inline(dst->data, dst->size)) { - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); - } - UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); + return upb_Encoder_Encode(&e, msg, l, buf, size); } -#define FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, validate_utf8) \ - int size = (uint8_t)ptr[0]; /* Could plumb through hasbits. */ \ - ptr++; \ - if (size & 0x80) { \ - ptr = fastdecode_longsize(ptr, &size); \ - } \ - \ - if (UPB_UNLIKELY(!upb_EpsCopyInputStream_CheckSize(&d->input, ptr, size))) { \ - dst->size = 0; \ - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ - } \ - \ - const char* s_ptr = ptr; \ - ptr = upb_EpsCopyInputStream_ReadString(&d->input, &s_ptr, size, &d->arena); \ - if (!ptr) _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); \ - dst->data = s_ptr; \ - dst->size = size; \ - \ - if (validate_utf8) { \ - data = (uint64_t)dst; \ - UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ - } else { \ - UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \ - } +// Fast decoder: ~3x the speed of decode.c, but requires x86-64/ARM64. +// Also the table size grows by 2x. +// +// Could potentially be ported to other 64-bit archs that pass at least six +// arguments in registers and have 8 unused high bits in pointers. +// +// The overall design is to create specialized functions for every possible +// field type (eg. oneof boolean field with a 1 byte tag) and then dispatch +// to the specialized function as quickly as possible. + + + +// Must be last. + +#if UPB_FASTTABLE + +// The standard set of arguments passed to each parsing function. +// Thanks to x86-64 calling conventions, these will stay in registers. +#define UPB_PARSE_PARAMS \ + upb_Decoder *d, const char *ptr, upb_Message *msg, intptr_t table, \ + uint64_t hasbits, uint64_t data + +#define UPB_PARSE_ARGS d, ptr, msg, table, hasbits, data + +#define RETURN_GENERIC(m) \ + /* Uncomment either of these for debugging purposes. */ \ + /* fprintf(stderr, m); */ \ + /*__builtin_trap(); */ \ + return _upb_FastDecoder_DecodeGeneric(d, ptr, msg, table, hasbits, 0); + +typedef enum { + CARD_s = 0, /* Singular (optional, non-repeated) */ + CARD_o = 1, /* Oneof */ + CARD_r = 2, /* Repeated */ + CARD_p = 3 /* Packed Repeated */ +} upb_card; UPB_NOINLINE -static const char* fastdecode_longstring_utf8(struct upb_Decoder* d, - const char* ptr, upb_Message* msg, - intptr_t table, uint64_t hasbits, - uint64_t data) { - upb_StringView* dst = (upb_StringView*)data; - FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, true); +static const char* fastdecode_isdonefallback(UPB_PARSE_PARAMS) { + int overrun = data; + ptr = _upb_EpsCopyInputStream_IsDoneFallbackInline( + &d->input, ptr, overrun, _upb_Decoder_BufferFlipCallback); + data = _upb_FastDecoder_LoadTag(ptr); + UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); } -UPB_NOINLINE -static const char* fastdecode_longstring_noutf8( - struct upb_Decoder* d, const char* ptr, upb_Message* msg, intptr_t table, - uint64_t hasbits, uint64_t data) { - upb_StringView* dst = (upb_StringView*)data; - FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, false); +UPB_FORCEINLINE +static const char* fastdecode_dispatch(UPB_PARSE_PARAMS) { + int overrun; + switch (upb_EpsCopyInputStream_IsDoneStatus(&d->input, ptr, &overrun)) { + case kUpb_IsDoneStatus_Done: + *(uint32_t*)msg |= hasbits; // Sync hasbits. + const upb_MiniTable* m = decode_totablep(table); + return UPB_UNLIKELY(m->UPB_PRIVATE(required_count)) + ? _upb_Decoder_CheckRequired(d, ptr, msg, m) + : ptr; + case kUpb_IsDoneStatus_NotDone: + break; + case kUpb_IsDoneStatus_NeedFallback: + data = overrun; + UPB_MUSTTAIL return fastdecode_isdonefallback(UPB_PARSE_ARGS); + } + + // Read two bytes of tag data (for a one-byte tag, the high byte is junk). + data = _upb_FastDecoder_LoadTag(ptr); + UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); } UPB_FORCEINLINE -static void fastdecode_docopy(upb_Decoder* d, const char* ptr, uint32_t size, - int copy, char* data, size_t data_offset, - upb_StringView* dst) { - d->arena.UPB_PRIVATE(ptr) += copy; - dst->data = data + data_offset; - UPB_UNPOISON_MEMORY_REGION(data, copy); - memcpy(data, ptr, copy); - UPB_POISON_MEMORY_REGION(data + data_offset + size, - copy - data_offset - size); +static bool fastdecode_checktag(uint16_t data, int tagbytes) { + if (tagbytes == 1) { + return (data & 0xff) == 0; + } else { + return data == 0; + } } -#define FASTDECODE_COPYSTRING(d, ptr, msg, table, hasbits, data, tagbytes, \ - card, validate_utf8) \ - upb_StringView* dst; \ - fastdecode_arr farr; \ - int64_t size; \ - size_t arena_has; \ - size_t common_has; \ - char* buf; \ - \ - UPB_ASSERT(!upb_EpsCopyInputStream_AliasingAvailable(&d->input, ptr, 0)); \ - UPB_ASSERT(fastdecode_checktag(data, tagbytes)); \ - \ - dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ - sizeof(upb_StringView), card); \ - \ - again: \ - if (card == CARD_r) { \ - dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_StringView)); \ - } \ - \ - size = (uint8_t)ptr[tagbytes]; \ - ptr += tagbytes + 1; \ - dst->size = size; \ - \ - buf = d->arena.UPB_PRIVATE(ptr); \ - arena_has = UPB_PRIVATE(_upb_ArenaHas)(&d->arena); \ - common_has = UPB_MIN(arena_has, \ - upb_EpsCopyInputStream_BytesAvailable(&d->input, ptr)); \ - \ - if (UPB_LIKELY(size <= 15 - tagbytes)) { \ - if (arena_has < 16) goto longstr; \ - fastdecode_docopy(d, ptr - tagbytes - 1, size, 16, buf, tagbytes + 1, \ - dst); \ - } else if (UPB_LIKELY(size <= 32)) { \ - if (UPB_UNLIKELY(common_has < 32)) goto longstr; \ - fastdecode_docopy(d, ptr, size, 32, buf, 0, dst); \ - } else if (UPB_LIKELY(size <= 64)) { \ - if (UPB_UNLIKELY(common_has < 64)) goto longstr; \ - fastdecode_docopy(d, ptr, size, 64, buf, 0, dst); \ - } else if (UPB_LIKELY(size < 128)) { \ - if (UPB_UNLIKELY(common_has < 128)) goto longstr; \ - fastdecode_docopy(d, ptr, size, 128, buf, 0, dst); \ - } else { \ - goto longstr; \ - } \ - \ - ptr += size; \ - \ - if (card == CARD_r) { \ - if (validate_utf8 && \ - !_upb_Decoder_VerifyUtf8Inline(dst->data, dst->size)) { \ - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); \ - } \ - fastdecode_nextret ret = fastdecode_nextrepeated( \ - d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_StringView)); \ - switch (ret.next) { \ - case FD_NEXT_SAMEFIELD: \ - dst = ret.dst; \ - goto again; \ - case FD_NEXT_OTHERFIELD: \ - data = ret.tag; \ - UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ - case FD_NEXT_ATLIMIT: \ - return ptr; \ - } \ - } \ - \ - if (card != CARD_r && validate_utf8) { \ - data = (uint64_t)dst; \ - UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ - } \ - \ - UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \ - \ - longstr: \ - if (card == CARD_r) { \ - fastdecode_commitarr(dst + 1, &farr, sizeof(upb_StringView)); \ - } \ - ptr--; \ - if (validate_utf8) { \ - UPB_MUSTTAIL return fastdecode_longstring_utf8(d, ptr, msg, table, \ - hasbits, (uint64_t)dst); \ - } else { \ - UPB_MUSTTAIL return fastdecode_longstring_noutf8(d, ptr, msg, table, \ - hasbits, (uint64_t)dst); \ +UPB_FORCEINLINE +static const char* fastdecode_longsize(const char* ptr, int* size) { + int i; + UPB_ASSERT(*size & 0x80); + *size &= 0xff; + for (i = 0; i < 3; i++) { + ptr++; + size_t byte = (uint8_t)ptr[-1]; + *size += (byte - 1) << (7 + 7 * i); + if (UPB_LIKELY((byte & 0x80) == 0)) return ptr; } + ptr++; + size_t byte = (uint8_t)ptr[-1]; + // len is limited by 2gb not 4gb, hence 8 and not 16 as normally expected + // for a 32 bit varint. + if (UPB_UNLIKELY(byte >= 8)) return NULL; + *size += (byte - 1) << 28; + return ptr; +} -#define FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes, card, \ - copyfunc, validate_utf8) \ - upb_StringView* dst; \ - fastdecode_arr farr; \ - int64_t size; \ - \ - if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ - RETURN_GENERIC("string field tag mismatch\n"); \ - } \ - \ - if (UPB_UNLIKELY( \ - !upb_EpsCopyInputStream_AliasingAvailable(&d->input, ptr, 0))) { \ - UPB_MUSTTAIL return copyfunc(UPB_PARSE_ARGS); \ - } \ - \ - dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ - sizeof(upb_StringView), card); \ - \ - again: \ - if (card == CARD_r) { \ - dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_StringView)); \ - } \ - \ - size = (int8_t)ptr[tagbytes]; \ - ptr += tagbytes + 1; \ - \ - if (UPB_UNLIKELY( \ - !upb_EpsCopyInputStream_AliasingAvailable(&d->input, ptr, size))) { \ - ptr--; \ - if (validate_utf8) { \ - return fastdecode_longstring_utf8(d, ptr, msg, table, hasbits, \ - (uint64_t)dst); \ - } else { \ - return fastdecode_longstring_noutf8(d, ptr, msg, table, hasbits, \ - (uint64_t)dst); \ - } \ - } \ - \ - dst->data = ptr; \ - dst->size = size; \ - ptr = upb_EpsCopyInputStream_ReadStringAliased(&d->input, &dst->data, \ - dst->size); \ - \ - if (card == CARD_r) { \ - if (validate_utf8 && \ - !_upb_Decoder_VerifyUtf8Inline(dst->data, dst->size)) { \ - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); \ - } \ - fastdecode_nextret ret = fastdecode_nextrepeated( \ - d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_StringView)); \ - switch (ret.next) { \ - case FD_NEXT_SAMEFIELD: \ - dst = ret.dst; \ - goto again; \ - case FD_NEXT_OTHERFIELD: \ - data = ret.tag; \ - UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ - case FD_NEXT_ATLIMIT: \ - return ptr; \ - } \ - } \ - \ - if (card != CARD_r && validate_utf8) { \ - data = (uint64_t)dst; \ - UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ - } \ - \ - UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); - -/* Generate all combinations: - * {p,c} x {s,o,r} x {s, b} x {1bt,2bt} */ +UPB_FORCEINLINE +static const char* fastdecode_delimited( + upb_Decoder* d, const char* ptr, + upb_EpsCopyInputStream_ParseDelimitedFunc* func, void* ctx) { + ptr++; -#define s_VALIDATE true -#define b_VALIDATE false + // Sign-extend so varint greater than one byte becomes negative, causing + // fast delimited parse to fail. + int len = (int8_t)ptr[-1]; -#define F(card, tagbytes, type) \ - UPB_NOINLINE \ - const char* upb_c##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ - FASTDECODE_COPYSTRING(d, ptr, msg, table, hasbits, data, tagbytes, \ - CARD_##card, type##_VALIDATE); \ - } \ - const char* upb_p##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ - FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes, \ - CARD_##card, upb_c##card##type##_##tagbytes##bt, \ - type##_VALIDATE); \ + if (!upb_EpsCopyInputStream_TryParseDelimitedFast(&d->input, &ptr, len, func, + ctx)) { + // Slow case: Sub-message is >=128 bytes and/or exceeds the current buffer. + // If it exceeds the buffer limit, limit/limit_ptr will change during + // sub-message parsing, so we need to preserve delta, not limit. + if (UPB_UNLIKELY(len & 0x80)) { + // Size varint >1 byte (length >= 128). + ptr = fastdecode_longsize(ptr, &len); + if (!ptr) { + // Corrupt wire format: size exceeded INT_MAX. + return NULL; + } + } + if (!upb_EpsCopyInputStream_CheckSize(&d->input, ptr, len)) { + // Corrupt wire format: invalid limit. + return NULL; + } + int delta = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, len); + ptr = func(&d->input, ptr, ctx); + upb_EpsCopyInputStream_PopLimit(&d->input, ptr, delta); } + return ptr; +} -#define UTF8(card, tagbytes) \ - F(card, tagbytes, s) \ - F(card, tagbytes, b) +/* singular, oneof, repeated field handling ***********************************/ -#define TAGBYTES(card) \ - UTF8(card, 1) \ - UTF8(card, 2) +typedef struct { + upb_Array* arr; + void* end; +} fastdecode_arr; -TAGBYTES(s) -TAGBYTES(o) -TAGBYTES(r) +typedef enum { + FD_NEXT_ATLIMIT, + FD_NEXT_SAMEFIELD, + FD_NEXT_OTHERFIELD +} fastdecode_next; -#undef s_VALIDATE -#undef b_VALIDATE -#undef F -#undef TAGBYTES -#undef FASTDECODE_LONGSTRING -#undef FASTDECODE_COPYSTRING -#undef FASTDECODE_STRING +typedef struct { + void* dst; + fastdecode_next next; + uint32_t tag; +} fastdecode_nextret; -/* message fields *************************************************************/ +UPB_FORCEINLINE +static void* fastdecode_resizearr(upb_Decoder* d, void* dst, + fastdecode_arr* farr, int valbytes) { + if (UPB_UNLIKELY(dst == farr->end)) { + size_t old_capacity = farr->arr->UPB_PRIVATE(capacity); + size_t old_bytes = old_capacity * valbytes; + size_t new_capacity = old_capacity * 2; + size_t new_bytes = new_capacity * valbytes; + char* old_ptr = _upb_array_ptr(farr->arr); + char* new_ptr = upb_Arena_Realloc(&d->arena, old_ptr, old_bytes, new_bytes); + uint8_t elem_size_lg2 = __builtin_ctz(valbytes); + UPB_PRIVATE(_upb_Array_SetTaggedPtr)(farr->arr, new_ptr, elem_size_lg2); + farr->arr->UPB_PRIVATE(capacity) = new_capacity; + dst = (void*)(new_ptr + (old_capacity * valbytes)); + farr->end = (void*)(new_ptr + (new_capacity * valbytes)); + } + return dst; +} -UPB_INLINE -upb_Message* decode_newmsg_ceil(upb_Decoder* d, const upb_MiniTable* m, - int msg_ceil_bytes) { - size_t size = m->UPB_PRIVATE(size) + sizeof(upb_Message_Internal); - char* msg_data; - if (UPB_LIKELY(msg_ceil_bytes > 0 && - UPB_PRIVATE(_upb_ArenaHas)(&d->arena) >= msg_ceil_bytes)) { - UPB_ASSERT(size <= (size_t)msg_ceil_bytes); - msg_data = d->arena.UPB_PRIVATE(ptr); - d->arena.UPB_PRIVATE(ptr) += size; - UPB_UNPOISON_MEMORY_REGION(msg_data, msg_ceil_bytes); - memset(msg_data, 0, msg_ceil_bytes); - UPB_POISON_MEMORY_REGION(msg_data + size, msg_ceil_bytes - size); +UPB_FORCEINLINE +static bool fastdecode_tagmatch(uint32_t tag, uint64_t data, int tagbytes) { + if (tagbytes == 1) { + return (uint8_t)tag == (uint8_t)data; } else { - msg_data = (char*)upb_Arena_Malloc(&d->arena, size); - memset(msg_data, 0, size); + return (uint16_t)tag == (uint16_t)data; } - return msg_data + sizeof(upb_Message_Internal); } -typedef struct { - intptr_t table; - upb_Message* msg; -} fastdecode_submsgdata; +UPB_FORCEINLINE +static void fastdecode_commitarr(void* dst, fastdecode_arr* farr, + int valbytes) { + farr->arr->UPB_PRIVATE(size) = + (size_t)((char*)dst - (char*)_upb_array_ptr(farr->arr)) / valbytes; +} UPB_FORCEINLINE -static const char* fastdecode_tosubmsg(upb_EpsCopyInputStream* e, - const char* ptr, void* ctx) { - upb_Decoder* d = (upb_Decoder*)e; - fastdecode_submsgdata* submsg = ctx; - ptr = fastdecode_dispatch(d, ptr, submsg->msg, submsg->table, 0, 0); - UPB_ASSUME(ptr != NULL); - return ptr; +static fastdecode_nextret fastdecode_nextrepeated(upb_Decoder* d, void* dst, + const char** ptr, + fastdecode_arr* farr, + uint64_t data, int tagbytes, + int valbytes) { + fastdecode_nextret ret; + dst = (char*)dst + valbytes; + + if (UPB_LIKELY(!_upb_Decoder_IsDone(d, ptr))) { + ret.tag = _upb_FastDecoder_LoadTag(*ptr); + if (fastdecode_tagmatch(ret.tag, data, tagbytes)) { + ret.next = FD_NEXT_SAMEFIELD; + } else { + fastdecode_commitarr(dst, farr, valbytes); + ret.next = FD_NEXT_OTHERFIELD; + } + } else { + fastdecode_commitarr(dst, farr, valbytes); + ret.next = FD_NEXT_ATLIMIT; + } + + ret.dst = dst; + return ret; } -#define FASTDECODE_SUBMSG(d, ptr, msg, table, hasbits, data, tagbytes, \ - msg_ceil_bytes, card) \ - \ - if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ - RETURN_GENERIC("submessage field tag mismatch\n"); \ - } \ - \ - if (--d->depth == 0) { \ - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_MaxDepthExceeded); \ - } \ - \ - upb_Message** dst; \ - uint32_t submsg_idx = (data >> 16) & 0xff; \ - const upb_MiniTable* tablep = decode_totablep(table); \ - const upb_MiniTable* subtablep = upb_MiniTableSub_Message( \ - *UPB_PRIVATE(_upb_MiniTable_GetSubByIndex)(tablep, submsg_idx)); \ - fastdecode_submsgdata submsg = {decode_totable(subtablep)}; \ - fastdecode_arr farr; \ - \ - if (subtablep->UPB_PRIVATE(table_mask) == (uint8_t)-1) { \ - d->depth++; \ - RETURN_GENERIC("submessage doesn't have fast tables."); \ - } \ - \ - dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ - sizeof(upb_Message*), card); \ - \ - if (card == CARD_s) { \ - *(uint32_t*)msg |= hasbits; \ - hasbits = 0; \ - } \ - \ - again: \ - if (card == CARD_r) { \ - dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_Message*)); \ - } \ - \ - submsg.msg = *dst; \ - \ - if (card == CARD_r || UPB_LIKELY(!submsg.msg)) { \ - *dst = submsg.msg = decode_newmsg_ceil(d, subtablep, msg_ceil_bytes); \ - } \ - \ - ptr += tagbytes; \ - ptr = fastdecode_delimited(d, ptr, fastdecode_tosubmsg, &submsg); \ - \ - if (UPB_UNLIKELY(ptr == NULL || d->end_group != DECODE_NOGROUP)) { \ - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ - } \ - \ - if (card == CARD_r) { \ - fastdecode_nextret ret = fastdecode_nextrepeated( \ - d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_Message*)); \ - switch (ret.next) { \ - case FD_NEXT_SAMEFIELD: \ - dst = ret.dst; \ - goto again; \ - case FD_NEXT_OTHERFIELD: \ - d->depth++; \ - data = ret.tag; \ - UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ - case FD_NEXT_ATLIMIT: \ - d->depth++; \ - return ptr; \ - } \ - } \ - \ - d->depth++; \ - UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); - -#define F(card, tagbytes, size_ceil, ceil_arg) \ - const char* upb_p##card##m_##tagbytes##bt_max##size_ceil##b( \ - UPB_PARSE_PARAMS) { \ - FASTDECODE_SUBMSG(d, ptr, msg, table, hasbits, data, tagbytes, ceil_arg, \ - CARD_##card); \ - } - -#define SIZES(card, tagbytes) \ - F(card, tagbytes, 64, 64) \ - F(card, tagbytes, 128, 128) \ - F(card, tagbytes, 192, 192) \ - F(card, tagbytes, 256, 256) \ - F(card, tagbytes, max, -1) - -#define TAGBYTES(card) \ - SIZES(card, 1) \ - SIZES(card, 2) - -TAGBYTES(s) -TAGBYTES(o) -TAGBYTES(r) - -#undef TAGBYTES -#undef SIZES -#undef F -#undef FASTDECODE_SUBMSG - -#endif /* UPB_FASTTABLE */ - -// We encode backwards, to avoid pre-computing lengths (one-pass encode). - - -#include -#include -#include -#include - - -// Must be last. - -#define UPB_PB_VARINT_MAX_LEN 10 - -UPB_NOINLINE -static size_t encode_varint64(uint64_t val, char* buf) { - size_t i = 0; - do { - uint8_t byte = val & 0x7fU; - val >>= 7; - if (val) byte |= 0x80U; - buf[i++] = byte; - } while (val); - return i; -} - -static uint32_t encode_zz32(int32_t n) { - return ((uint32_t)n << 1) ^ (n >> 31); -} -static uint64_t encode_zz64(int64_t n) { - return ((uint64_t)n << 1) ^ (n >> 63); +UPB_FORCEINLINE +static void* fastdecode_fieldmem(upb_Message* msg, uint64_t data) { + size_t ofs = data >> 48; + return (char*)msg + ofs; } -typedef struct { - upb_EncodeStatus status; - jmp_buf err; - upb_Arena* arena; - char *buf, *ptr, *limit; - int options; - int depth; - _upb_mapsorter sorter; -} upb_encstate; - -static size_t upb_roundup_pow2(size_t bytes) { - size_t ret = 128; - while (ret < bytes) { - ret *= 2; +UPB_FORCEINLINE +static void* fastdecode_getfield(upb_Decoder* d, const char* ptr, + upb_Message* msg, uint64_t* data, + uint64_t* hasbits, fastdecode_arr* farr, + int valbytes, upb_card card) { + switch (card) { + case CARD_s: { + uint8_t hasbit_index = *data >> 24; + // Set hasbit and return pointer to scalar field. + *hasbits |= 1ull << hasbit_index; + return fastdecode_fieldmem(msg, *data); + } + case CARD_o: { + uint16_t case_ofs = *data >> 32; + uint32_t* oneof_case = UPB_PTR_AT(msg, case_ofs, uint32_t); + uint8_t field_number = *data >> 24; + *oneof_case = field_number; + return fastdecode_fieldmem(msg, *data); + } + case CARD_r: { + // Get pointer to upb_Array and allocate/expand if necessary. + uint8_t elem_size_lg2 = __builtin_ctz(valbytes); + upb_Array** arr_p = fastdecode_fieldmem(msg, *data); + char* begin; + *(uint32_t*)msg |= *hasbits; + *hasbits = 0; + if (UPB_LIKELY(!*arr_p)) { + farr->arr = UPB_PRIVATE(_upb_Array_New)(&d->arena, 8, elem_size_lg2); + *arr_p = farr->arr; + } else { + farr->arr = *arr_p; + } + begin = _upb_array_ptr(farr->arr); + farr->end = begin + (farr->arr->UPB_PRIVATE(capacity) * valbytes); + *data = _upb_FastDecoder_LoadTag(ptr); + return begin + (farr->arr->UPB_PRIVATE(size) * valbytes); + } + default: + UPB_UNREACHABLE(); } - return ret; } -UPB_NORETURN static void encode_err(upb_encstate* e, upb_EncodeStatus s) { - UPB_ASSERT(s != kUpb_EncodeStatus_Ok); - e->status = s; - UPB_LONGJMP(e->err, 1); +UPB_FORCEINLINE +static bool fastdecode_flippacked(uint64_t* data, int tagbytes) { + *data ^= (0x2 ^ 0x0); // Patch data to match packed wiretype. + return fastdecode_checktag(*data, tagbytes); } -UPB_NOINLINE -static void encode_growbuffer(upb_encstate* e, size_t bytes) { - size_t old_size = e->limit - e->buf; - size_t new_size = upb_roundup_pow2(bytes + (e->limit - e->ptr)); - char* new_buf = upb_Arena_Realloc(e->arena, e->buf, old_size, new_size); - - if (!new_buf) encode_err(e, kUpb_EncodeStatus_OutOfMemory); - - // We want previous data at the end, realloc() put it at the beginning. - // TODO: This is somewhat inefficient since we are copying twice. - // Maybe create a realloc() that copies to the end of the new buffer? - if (old_size > 0) { - memmove(new_buf + new_size - old_size, e->buf, old_size); +#define FASTDECODE_CHECKPACKED(tagbytes, card, func) \ + if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ + if (card == CARD_r && fastdecode_flippacked(&data, tagbytes)) { \ + UPB_MUSTTAIL return func(UPB_PARSE_ARGS); \ + } \ + RETURN_GENERIC("packed check tag mismatch\n"); \ } - e->ptr = new_buf + new_size - (e->limit - e->ptr); - e->limit = new_buf + new_size; - e->buf = new_buf; - - e->ptr -= bytes; -} +/* varint fields **************************************************************/ -/* Call to ensure that at least "bytes" bytes are available for writing at - * e->ptr. Returns false if the bytes could not be allocated. */ UPB_FORCEINLINE -static void encode_reserve(upb_encstate* e, size_t bytes) { - if ((size_t)(e->ptr - e->buf) < bytes) { - encode_growbuffer(e, bytes); - return; +static uint64_t fastdecode_munge(uint64_t val, int valbytes, bool zigzag) { + if (valbytes == 1) { + return val != 0; + } else if (zigzag) { + if (valbytes == 4) { + uint32_t n = val; + return (n >> 1) ^ -(int32_t)(n & 1); + } else if (valbytes == 8) { + return (val >> 1) ^ -(int64_t)(val & 1); + } + UPB_UNREACHABLE(); } - - e->ptr -= bytes; -} - -/* Writes the given bytes to the buffer, handling reserve/advance. */ -static void encode_bytes(upb_encstate* e, const void* data, size_t len) { - if (len == 0) return; /* memcpy() with zero size is UB */ - encode_reserve(e, len); - memcpy(e->ptr, data, len); -} - -static void encode_fixed64(upb_encstate* e, uint64_t val) { - val = _upb_BigEndian_Swap64(val); - encode_bytes(e, &val, sizeof(uint64_t)); -} - -static void encode_fixed32(upb_encstate* e, uint32_t val) { - val = _upb_BigEndian_Swap32(val); - encode_bytes(e, &val, sizeof(uint32_t)); -} - -UPB_NOINLINE -static void encode_longvarint(upb_encstate* e, uint64_t val) { - size_t len; - char* start; - - encode_reserve(e, UPB_PB_VARINT_MAX_LEN); - len = encode_varint64(val, e->ptr); - start = e->ptr + UPB_PB_VARINT_MAX_LEN - len; - memmove(start, e->ptr, len); - e->ptr = start; + return val; } UPB_FORCEINLINE -static void encode_varint(upb_encstate* e, uint64_t val) { - if (val < 128 && e->ptr != e->buf) { - --e->ptr; - *e->ptr = val; - } else { - encode_longvarint(e, val); +static const char* fastdecode_varint64(const char* ptr, uint64_t* val) { + ptr++; + *val = (uint8_t)ptr[-1]; + if (UPB_UNLIKELY(*val & 0x80)) { + int i; + for (i = 0; i < 8; i++) { + ptr++; + uint64_t byte = (uint8_t)ptr[-1]; + *val += (byte - 1) << (7 + 7 * i); + if (UPB_LIKELY((byte & 0x80) == 0)) goto done; + } + ptr++; + uint64_t byte = (uint8_t)ptr[-1]; + if (byte > 1) { + return NULL; + } + *val += (byte - 1) << 63; } +done: + UPB_ASSUME(ptr != NULL); + return ptr; } -static void encode_double(upb_encstate* e, double d) { - uint64_t u64; - UPB_ASSERT(sizeof(double) == sizeof(uint64_t)); - memcpy(&u64, &d, sizeof(uint64_t)); - encode_fixed64(e, u64); -} - -static void encode_float(upb_encstate* e, float d) { - uint32_t u32; - UPB_ASSERT(sizeof(float) == sizeof(uint32_t)); - memcpy(&u32, &d, sizeof(uint32_t)); - encode_fixed32(e, u32); -} - -static void encode_tag(upb_encstate* e, uint32_t field_number, - uint8_t wire_type) { - encode_varint(e, (field_number << 3) | wire_type); -} +#define FASTDECODE_UNPACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, zigzag, packed) \ + uint64_t val; \ + void* dst; \ + fastdecode_arr farr; \ + \ + FASTDECODE_CHECKPACKED(tagbytes, card, packed); \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes, \ + card); \ + if (card == CARD_r) { \ + if (UPB_UNLIKELY(!dst)) { \ + RETURN_GENERIC("need array resize\n"); \ + } \ + } \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, valbytes); \ + } \ + \ + ptr += tagbytes; \ + ptr = fastdecode_varint64(ptr, &val); \ + if (ptr == NULL) _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ + val = fastdecode_munge(val, valbytes, zigzag); \ + memcpy(dst, &val, valbytes); \ + \ + if (card == CARD_r) { \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, valbytes); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + data = ret.tag; \ + UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + return ptr; \ + } \ + } \ + \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); -static void encode_fixedarray(upb_encstate* e, const upb_Array* arr, - size_t elem_size, uint32_t tag) { - size_t bytes = arr->UPB_PRIVATE(size) * elem_size; - const char* data = _upb_array_constptr(arr); - const char* ptr = data + bytes - elem_size; +typedef struct { + uint8_t valbytes; + bool zigzag; + void* dst; + fastdecode_arr farr; +} fastdecode_varintdata; - if (tag || !_upb_IsLittleEndian()) { - while (true) { - if (elem_size == 4) { - uint32_t val; - memcpy(&val, ptr, sizeof(val)); - val = _upb_BigEndian_Swap32(val); - encode_bytes(e, &val, elem_size); - } else { - UPB_ASSERT(elem_size == 8); - uint64_t val; - memcpy(&val, ptr, sizeof(val)); - val = _upb_BigEndian_Swap64(val); - encode_bytes(e, &val, elem_size); - } +UPB_FORCEINLINE +static const char* fastdecode_topackedvarint(upb_EpsCopyInputStream* e, + const char* ptr, void* ctx) { + upb_Decoder* d = (upb_Decoder*)e; + fastdecode_varintdata* data = ctx; + void* dst = data->dst; + uint64_t val; - if (tag) encode_varint(e, tag); - if (ptr == data) break; - ptr -= elem_size; - } - } else { - encode_bytes(e, data, bytes); + while (!_upb_Decoder_IsDone(d, &ptr)) { + dst = fastdecode_resizearr(d, dst, &data->farr, data->valbytes); + ptr = fastdecode_varint64(ptr, &val); + if (ptr == NULL) return NULL; + val = fastdecode_munge(val, data->valbytes, data->zigzag); + memcpy(dst, &val, data->valbytes); + dst = (char*)dst + data->valbytes; } -} -static void encode_message(upb_encstate* e, const upb_Message* msg, - const upb_MiniTable* m, size_t* size); - -static void encode_TaggedMessagePtr(upb_encstate* e, - upb_TaggedMessagePtr tagged, - const upb_MiniTable* m, size_t* size) { - if (upb_TaggedMessagePtr_IsEmpty(tagged)) { - m = UPB_PRIVATE(_upb_MiniTable_Empty)(); - } - encode_message(e, _upb_TaggedMessagePtr_GetMessage(tagged), m, size); + fastdecode_commitarr(dst, &data->farr, data->valbytes); + return ptr; } -static void encode_scalar(upb_encstate* e, const void* _field_mem, - const upb_MiniTableSub* subs, - const upb_MiniTableField* f) { - const char* field_mem = _field_mem; - int wire_type; - -#define CASE(ctype, type, wtype, encodeval) \ - { \ - ctype val = *(ctype*)field_mem; \ - encode_##type(e, encodeval); \ - wire_type = wtype; \ - break; \ - } +#define FASTDECODE_PACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, zigzag, unpacked) \ + fastdecode_varintdata ctx = {valbytes, zigzag}; \ + \ + FASTDECODE_CHECKPACKED(tagbytes, CARD_r, unpacked); \ + \ + ctx.dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &ctx.farr, \ + valbytes, CARD_r); \ + if (UPB_UNLIKELY(!ctx.dst)) { \ + RETURN_GENERIC("need array resize\n"); \ + } \ + \ + ptr += tagbytes; \ + ptr = fastdecode_delimited(d, ptr, &fastdecode_topackedvarint, &ctx); \ + \ + if (UPB_UNLIKELY(ptr == NULL)) { \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ + } \ + \ + UPB_MUSTTAIL return fastdecode_dispatch(d, ptr, msg, table, hasbits, 0); - switch (f->UPB_PRIVATE(descriptortype)) { - case kUpb_FieldType_Double: - CASE(double, double, kUpb_WireType_64Bit, val); - case kUpb_FieldType_Float: - CASE(float, float, kUpb_WireType_32Bit, val); - case kUpb_FieldType_Int64: - case kUpb_FieldType_UInt64: - CASE(uint64_t, varint, kUpb_WireType_Varint, val); - case kUpb_FieldType_UInt32: - CASE(uint32_t, varint, kUpb_WireType_Varint, val); - case kUpb_FieldType_Int32: - case kUpb_FieldType_Enum: - CASE(int32_t, varint, kUpb_WireType_Varint, (int64_t)val); - case kUpb_FieldType_SFixed64: - case kUpb_FieldType_Fixed64: - CASE(uint64_t, fixed64, kUpb_WireType_64Bit, val); - case kUpb_FieldType_Fixed32: - case kUpb_FieldType_SFixed32: - CASE(uint32_t, fixed32, kUpb_WireType_32Bit, val); - case kUpb_FieldType_Bool: - CASE(bool, varint, kUpb_WireType_Varint, val); - case kUpb_FieldType_SInt32: - CASE(int32_t, varint, kUpb_WireType_Varint, encode_zz32(val)); - case kUpb_FieldType_SInt64: - CASE(int64_t, varint, kUpb_WireType_Varint, encode_zz64(val)); - case kUpb_FieldType_String: - case kUpb_FieldType_Bytes: { - upb_StringView view = *(upb_StringView*)field_mem; - encode_bytes(e, view.data, view.size); - encode_varint(e, view.size); - wire_type = kUpb_WireType_Delimited; - break; - } - case kUpb_FieldType_Group: { - size_t size; - upb_TaggedMessagePtr submsg = *(upb_TaggedMessagePtr*)field_mem; - const upb_MiniTable* subm = - upb_MiniTableSub_Message(subs[f->UPB_PRIVATE(submsg_index)]); - if (submsg == 0) { - return; - } - if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded); - encode_tag(e, f->UPB_PRIVATE(number), kUpb_WireType_EndGroup); - encode_TaggedMessagePtr(e, submsg, subm, &size); - wire_type = kUpb_WireType_StartGroup; - e->depth++; - break; - } - case kUpb_FieldType_Message: { - size_t size; - upb_TaggedMessagePtr submsg = *(upb_TaggedMessagePtr*)field_mem; - const upb_MiniTable* subm = - upb_MiniTableSub_Message(subs[f->UPB_PRIVATE(submsg_index)]); - if (submsg == 0) { - return; - } - if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded); - encode_TaggedMessagePtr(e, submsg, subm, &size); - encode_varint(e, size); - wire_type = kUpb_WireType_Delimited; - e->depth++; - break; - } - default: - UPB_UNREACHABLE(); +#define FASTDECODE_VARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, zigzag, unpacked, packed) \ + if (card == CARD_p) { \ + FASTDECODE_PACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, zigzag, unpacked); \ + } else { \ + FASTDECODE_UNPACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, zigzag, packed); \ } -#undef CASE - encode_tag(e, f->UPB_PRIVATE(number), wire_type); -} +#define z_ZZ true +#define b_ZZ false +#define v_ZZ false -static void encode_array(upb_encstate* e, const upb_Message* msg, - const upb_MiniTableSub* subs, - const upb_MiniTableField* f) { - const upb_Array* arr = *UPB_PTR_AT(msg, f->UPB_PRIVATE(offset), upb_Array*); - bool packed = upb_MiniTableField_IsPacked(f); - size_t pre_len = e->limit - e->ptr; +/* Generate all combinations: + * {s,o,r,p} x {b1,v4,z4,v8,z8} x {1bt,2bt} */ - if (arr == NULL || arr->UPB_PRIVATE(size) == 0) { - return; +#define F(card, type, valbytes, tagbytes) \ + UPB_NOINLINE \ + const char* upb_p##card##type##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ + FASTDECODE_VARINT(d, ptr, msg, table, hasbits, data, tagbytes, valbytes, \ + CARD_##card, type##_ZZ, \ + upb_pr##type##valbytes##_##tagbytes##bt, \ + upb_pp##type##valbytes##_##tagbytes##bt); \ } -#define VARINT_CASE(ctype, encode) \ - { \ - const ctype* start = _upb_array_constptr(arr); \ - const ctype* ptr = start + arr->UPB_PRIVATE(size); \ - uint32_t tag = \ - packed ? 0 : (f->UPB_PRIVATE(number) << 3) | kUpb_WireType_Varint; \ - do { \ - ptr--; \ - encode_varint(e, encode); \ - if (tag) encode_varint(e, tag); \ - } while (ptr != start); \ - } \ - break; +#define TYPES(card, tagbytes) \ + F(card, b, 1, tagbytes) \ + F(card, v, 4, tagbytes) \ + F(card, v, 8, tagbytes) \ + F(card, z, 4, tagbytes) \ + F(card, z, 8, tagbytes) -#define TAG(wire_type) (packed ? 0 : (f->UPB_PRIVATE(number) << 3 | wire_type)) +#define TAGBYTES(card) \ + TYPES(card, 1) \ + TYPES(card, 2) - switch (f->UPB_PRIVATE(descriptortype)) { - case kUpb_FieldType_Double: - encode_fixedarray(e, arr, sizeof(double), TAG(kUpb_WireType_64Bit)); - break; - case kUpb_FieldType_Float: - encode_fixedarray(e, arr, sizeof(float), TAG(kUpb_WireType_32Bit)); - break; - case kUpb_FieldType_SFixed64: - case kUpb_FieldType_Fixed64: - encode_fixedarray(e, arr, sizeof(uint64_t), TAG(kUpb_WireType_64Bit)); - break; - case kUpb_FieldType_Fixed32: - case kUpb_FieldType_SFixed32: - encode_fixedarray(e, arr, sizeof(uint32_t), TAG(kUpb_WireType_32Bit)); - break; - case kUpb_FieldType_Int64: - case kUpb_FieldType_UInt64: - VARINT_CASE(uint64_t, *ptr); - case kUpb_FieldType_UInt32: - VARINT_CASE(uint32_t, *ptr); - case kUpb_FieldType_Int32: - case kUpb_FieldType_Enum: - VARINT_CASE(int32_t, (int64_t)*ptr); - case kUpb_FieldType_Bool: - VARINT_CASE(bool, *ptr); - case kUpb_FieldType_SInt32: - VARINT_CASE(int32_t, encode_zz32(*ptr)); - case kUpb_FieldType_SInt64: - VARINT_CASE(int64_t, encode_zz64(*ptr)); - case kUpb_FieldType_String: - case kUpb_FieldType_Bytes: { - const upb_StringView* start = _upb_array_constptr(arr); - const upb_StringView* ptr = start + arr->UPB_PRIVATE(size); - do { - ptr--; - encode_bytes(e, ptr->data, ptr->size); - encode_varint(e, ptr->size); - encode_tag(e, f->UPB_PRIVATE(number), kUpb_WireType_Delimited); - } while (ptr != start); - return; - } - case kUpb_FieldType_Group: { - const upb_TaggedMessagePtr* start = _upb_array_constptr(arr); - const upb_TaggedMessagePtr* ptr = start + arr->UPB_PRIVATE(size); - const upb_MiniTable* subm = - upb_MiniTableSub_Message(subs[f->UPB_PRIVATE(submsg_index)]); - if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded); - do { - size_t size; - ptr--; - encode_tag(e, f->UPB_PRIVATE(number), kUpb_WireType_EndGroup); - encode_TaggedMessagePtr(e, *ptr, subm, &size); - encode_tag(e, f->UPB_PRIVATE(number), kUpb_WireType_StartGroup); - } while (ptr != start); - e->depth++; - return; - } - case kUpb_FieldType_Message: { - const upb_TaggedMessagePtr* start = _upb_array_constptr(arr); - const upb_TaggedMessagePtr* ptr = start + arr->UPB_PRIVATE(size); - const upb_MiniTable* subm = - upb_MiniTableSub_Message(subs[f->UPB_PRIVATE(submsg_index)]); - if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded); - do { - size_t size; - ptr--; - encode_TaggedMessagePtr(e, *ptr, subm, &size); - encode_varint(e, size); - encode_tag(e, f->UPB_PRIVATE(number), kUpb_WireType_Delimited); - } while (ptr != start); - e->depth++; - return; - } +TAGBYTES(s) +TAGBYTES(o) +TAGBYTES(r) +TAGBYTES(p) + +#undef z_ZZ +#undef b_ZZ +#undef v_ZZ +#undef o_ONEOF +#undef s_ONEOF +#undef r_ONEOF +#undef F +#undef TYPES +#undef TAGBYTES +#undef FASTDECODE_UNPACKEDVARINT +#undef FASTDECODE_PACKEDVARINT +#undef FASTDECODE_VARINT + +/* fixed fields ***************************************************************/ + +#define FASTDECODE_UNPACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, packed) \ + void* dst; \ + fastdecode_arr farr; \ + \ + FASTDECODE_CHECKPACKED(tagbytes, card, packed) \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes, \ + card); \ + if (card == CARD_r) { \ + if (UPB_UNLIKELY(!dst)) { \ + RETURN_GENERIC("couldn't allocate array in arena\n"); \ + } \ + } \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, valbytes); \ + } \ + \ + ptr += tagbytes; \ + memcpy(dst, ptr, valbytes); \ + ptr += valbytes; \ + \ + if (card == CARD_r) { \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, valbytes); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + data = ret.tag; \ + UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + return ptr; \ + } \ + } \ + \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); + +#define FASTDECODE_PACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, unpacked) \ + FASTDECODE_CHECKPACKED(tagbytes, CARD_r, unpacked) \ + \ + ptr += tagbytes; \ + int size = (uint8_t)ptr[0]; \ + ptr++; \ + if (size & 0x80) { \ + ptr = fastdecode_longsize(ptr, &size); \ + } \ + \ + if (UPB_UNLIKELY(!upb_EpsCopyInputStream_CheckDataSizeAvailable( \ + &d->input, ptr, size) || \ + (size % valbytes) != 0)) { \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ + } \ + \ + upb_Array** arr_p = fastdecode_fieldmem(msg, data); \ + upb_Array* arr = *arr_p; \ + uint8_t elem_size_lg2 = __builtin_ctz(valbytes); \ + int elems = size / valbytes; \ + \ + if (UPB_LIKELY(!arr)) { \ + *arr_p = arr = \ + UPB_PRIVATE(_upb_Array_New)(&d->arena, elems, elem_size_lg2); \ + if (!arr) { \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ + } \ + } else { \ + _upb_Array_ResizeUninitialized(arr, elems, &d->arena); \ + } \ + \ + char* dst = _upb_array_ptr(arr); \ + memcpy(dst, ptr, size); \ + arr->UPB_PRIVATE(size) = elems; \ + \ + ptr += size; \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); + +#define FASTDECODE_FIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, unpacked, packed) \ + if (card == CARD_p) { \ + FASTDECODE_PACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, unpacked); \ + } else { \ + FASTDECODE_UNPACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, packed); \ } -#undef VARINT_CASE - if (packed) { - encode_varint(e, e->limit - e->ptr - pre_len); - encode_tag(e, f->UPB_PRIVATE(number), kUpb_WireType_Delimited); +/* Generate all combinations: + * {s,o,r,p} x {f4,f8} x {1bt,2bt} */ + +#define F(card, valbytes, tagbytes) \ + UPB_NOINLINE \ + const char* upb_p##card##f##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ + FASTDECODE_FIXED(d, ptr, msg, table, hasbits, data, tagbytes, valbytes, \ + CARD_##card, upb_ppf##valbytes##_##tagbytes##bt, \ + upb_prf##valbytes##_##tagbytes##bt); \ } -} -static void encode_mapentry(upb_encstate* e, uint32_t number, - const upb_MiniTable* layout, - const upb_MapEntry* ent) { - const upb_MiniTableField* key_field = &layout->UPB_PRIVATE(fields)[0]; - const upb_MiniTableField* val_field = &layout->UPB_PRIVATE(fields)[1]; - size_t pre_len = e->limit - e->ptr; - size_t size; - encode_scalar(e, &ent->data.v, layout->UPB_PRIVATE(subs), val_field); - encode_scalar(e, &ent->data.k, layout->UPB_PRIVATE(subs), key_field); - size = (e->limit - e->ptr) - pre_len; - encode_varint(e, size); - encode_tag(e, number, kUpb_WireType_Delimited); -} +#define TYPES(card, tagbytes) \ + F(card, 4, tagbytes) \ + F(card, 8, tagbytes) -static void encode_map(upb_encstate* e, const upb_Message* msg, - const upb_MiniTableSub* subs, - const upb_MiniTableField* f) { - const upb_Map* map = *UPB_PTR_AT(msg, f->UPB_PRIVATE(offset), const upb_Map*); - const upb_MiniTable* layout = - upb_MiniTableSub_Message(subs[f->UPB_PRIVATE(submsg_index)]); - UPB_ASSERT(layout->UPB_PRIVATE(field_count) == 2); +#define TAGBYTES(card) \ + TYPES(card, 1) \ + TYPES(card, 2) - if (map == NULL) return; +TAGBYTES(s) +TAGBYTES(o) +TAGBYTES(r) +TAGBYTES(p) - if (e->options & kUpb_EncodeOption_Deterministic) { - _upb_sortedmap sorted; - _upb_mapsorter_pushmap( - &e->sorter, layout->UPB_PRIVATE(fields)[0].UPB_PRIVATE(descriptortype), - map, &sorted); - upb_MapEntry ent; - while (_upb_sortedmap_next(&e->sorter, map, &sorted, &ent)) { - encode_mapentry(e, f->UPB_PRIVATE(number), layout, &ent); - } - _upb_mapsorter_popmap(&e->sorter, &sorted); - } else { - intptr_t iter = UPB_STRTABLE_BEGIN; - upb_StringView key; - upb_value val; - while (upb_strtable_next2(&map->table, &key, &val, &iter)) { - upb_MapEntry ent; - _upb_map_fromkey(key, &ent.data.k, map->key_size); - _upb_map_fromvalue(val, &ent.data.v, map->val_size); - encode_mapentry(e, f->UPB_PRIVATE(number), layout, &ent); - } - } -} +#undef F +#undef TYPES +#undef TAGBYTES +#undef FASTDECODE_UNPACKEDFIXED +#undef FASTDECODE_PACKEDFIXED -static bool encode_shouldencode(upb_encstate* e, const upb_Message* msg, - const upb_MiniTableSub* subs, - const upb_MiniTableField* f) { - if (f->presence == 0) { - // Proto3 presence or map/array. - const void* mem = UPB_PTR_AT(msg, f->UPB_PRIVATE(offset), void); - switch (UPB_PRIVATE(_upb_MiniTableField_GetRep)(f)) { - case kUpb_FieldRep_1Byte: { - char ch; - memcpy(&ch, mem, 1); - return ch != 0; - } - case kUpb_FieldRep_4Byte: { - uint32_t u32; - memcpy(&u32, mem, 4); - return u32 != 0; - } - case kUpb_FieldRep_8Byte: { - uint64_t u64; - memcpy(&u64, mem, 8); - return u64 != 0; - } - case kUpb_FieldRep_StringView: { - const upb_StringView* str = (const upb_StringView*)mem; - return str->size != 0; - } - default: - UPB_UNREACHABLE(); - } - } else if (f->presence > 0) { - // Proto2 presence: hasbit. - return UPB_PRIVATE(_upb_Message_GetHasbit)(msg, f); - } else { - // Field is in a oneof. - return UPB_PRIVATE(_upb_Message_GetOneofCase)(msg, f) == - f->UPB_PRIVATE(number); +/* string fields **************************************************************/ + +typedef const char* fastdecode_copystr_func(struct upb_Decoder* d, + const char* ptr, upb_Message* msg, + const upb_MiniTable* table, + uint64_t hasbits, + upb_StringView* dst); + +UPB_NOINLINE +static const char* fastdecode_verifyutf8(upb_Decoder* d, const char* ptr, + upb_Message* msg, intptr_t table, + uint64_t hasbits, uint64_t data) { + upb_StringView* dst = (upb_StringView*)data; + if (!_upb_Decoder_VerifyUtf8Inline(dst->data, dst->size)) { + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); } + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); } -static void encode_field(upb_encstate* e, const upb_Message* msg, - const upb_MiniTableSub* subs, - const upb_MiniTableField* field) { - switch (UPB_PRIVATE(_upb_MiniTableField_Mode)(field)) { - case kUpb_FieldMode_Array: - encode_array(e, msg, subs, field); - break; - case kUpb_FieldMode_Map: - encode_map(e, msg, subs, field); - break; - case kUpb_FieldMode_Scalar: - encode_scalar(e, UPB_PTR_AT(msg, field->UPB_PRIVATE(offset), void), subs, - field); - break; - default: - UPB_UNREACHABLE(); +#define FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, validate_utf8) \ + int size = (uint8_t)ptr[0]; /* Could plumb through hasbits. */ \ + ptr++; \ + if (size & 0x80) { \ + ptr = fastdecode_longsize(ptr, &size); \ + } \ + \ + if (UPB_UNLIKELY(!upb_EpsCopyInputStream_CheckSize(&d->input, ptr, size))) { \ + dst->size = 0; \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ + } \ + \ + const char* s_ptr = ptr; \ + ptr = upb_EpsCopyInputStream_ReadString(&d->input, &s_ptr, size, &d->arena); \ + if (!ptr) _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); \ + dst->data = s_ptr; \ + dst->size = size; \ + \ + if (validate_utf8) { \ + data = (uint64_t)dst; \ + UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ + } else { \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \ } + +UPB_NOINLINE +static const char* fastdecode_longstring_utf8(struct upb_Decoder* d, + const char* ptr, upb_Message* msg, + intptr_t table, uint64_t hasbits, + uint64_t data) { + upb_StringView* dst = (upb_StringView*)data; + FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, true); } -static void encode_msgset_item(upb_encstate* e, const upb_Extension* ext) { - size_t size; - encode_tag(e, kUpb_MsgSet_Item, kUpb_WireType_EndGroup); - encode_message(e, ext->data.ptr, - upb_MiniTableExtension_GetSubMessage(ext->ext), &size); - encode_varint(e, size); - encode_tag(e, kUpb_MsgSet_Message, kUpb_WireType_Delimited); - encode_varint(e, upb_MiniTableExtension_Number(ext->ext)); - encode_tag(e, kUpb_MsgSet_TypeId, kUpb_WireType_Varint); - encode_tag(e, kUpb_MsgSet_Item, kUpb_WireType_StartGroup); +UPB_NOINLINE +static const char* fastdecode_longstring_noutf8( + struct upb_Decoder* d, const char* ptr, upb_Message* msg, intptr_t table, + uint64_t hasbits, uint64_t data) { + upb_StringView* dst = (upb_StringView*)data; + FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, false); +} + +UPB_FORCEINLINE +static void fastdecode_docopy(upb_Decoder* d, const char* ptr, uint32_t size, + int copy, char* data, size_t data_offset, + upb_StringView* dst) { + d->arena.UPB_PRIVATE(ptr) += copy; + dst->data = data + data_offset; + UPB_UNPOISON_MEMORY_REGION(data, copy); + memcpy(data, ptr, copy); + UPB_POISON_MEMORY_REGION(data + data_offset + size, + copy - data_offset - size); } -static void encode_ext(upb_encstate* e, const upb_Extension* ext, - bool is_message_set) { - if (UPB_UNLIKELY(is_message_set)) { - encode_msgset_item(e, ext); - } else { - encode_field(e, (upb_Message*)&ext->data, &ext->ext->UPB_PRIVATE(sub), - &ext->ext->UPB_PRIVATE(field)); +#define FASTDECODE_COPYSTRING(d, ptr, msg, table, hasbits, data, tagbytes, \ + card, validate_utf8) \ + upb_StringView* dst; \ + fastdecode_arr farr; \ + int64_t size; \ + size_t arena_has; \ + size_t common_has; \ + char* buf; \ + \ + UPB_ASSERT(!upb_EpsCopyInputStream_AliasingAvailable(&d->input, ptr, 0)); \ + UPB_ASSERT(fastdecode_checktag(data, tagbytes)); \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ + sizeof(upb_StringView), card); \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_StringView)); \ + } \ + \ + size = (uint8_t)ptr[tagbytes]; \ + ptr += tagbytes + 1; \ + dst->size = size; \ + \ + buf = d->arena.UPB_PRIVATE(ptr); \ + arena_has = UPB_PRIVATE(_upb_ArenaHas)(&d->arena); \ + common_has = UPB_MIN(arena_has, \ + upb_EpsCopyInputStream_BytesAvailable(&d->input, ptr)); \ + \ + if (UPB_LIKELY(size <= 15 - tagbytes)) { \ + if (arena_has < 16) goto longstr; \ + fastdecode_docopy(d, ptr - tagbytes - 1, size, 16, buf, tagbytes + 1, \ + dst); \ + } else if (UPB_LIKELY(size <= 32)) { \ + if (UPB_UNLIKELY(common_has < 32)) goto longstr; \ + fastdecode_docopy(d, ptr, size, 32, buf, 0, dst); \ + } else if (UPB_LIKELY(size <= 64)) { \ + if (UPB_UNLIKELY(common_has < 64)) goto longstr; \ + fastdecode_docopy(d, ptr, size, 64, buf, 0, dst); \ + } else if (UPB_LIKELY(size < 128)) { \ + if (UPB_UNLIKELY(common_has < 128)) goto longstr; \ + fastdecode_docopy(d, ptr, size, 128, buf, 0, dst); \ + } else { \ + goto longstr; \ + } \ + \ + ptr += size; \ + \ + if (card == CARD_r) { \ + if (validate_utf8 && \ + !_upb_Decoder_VerifyUtf8Inline(dst->data, dst->size)) { \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); \ + } \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_StringView)); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + data = ret.tag; \ + UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + return ptr; \ + } \ + } \ + \ + if (card != CARD_r && validate_utf8) { \ + data = (uint64_t)dst; \ + UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ + } \ + \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \ + \ + longstr: \ + if (card == CARD_r) { \ + fastdecode_commitarr(dst + 1, &farr, sizeof(upb_StringView)); \ + } \ + ptr--; \ + if (validate_utf8) { \ + UPB_MUSTTAIL return fastdecode_longstring_utf8(d, ptr, msg, table, \ + hasbits, (uint64_t)dst); \ + } else { \ + UPB_MUSTTAIL return fastdecode_longstring_noutf8(d, ptr, msg, table, \ + hasbits, (uint64_t)dst); \ + } + +#define FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes, card, \ + copyfunc, validate_utf8) \ + upb_StringView* dst; \ + fastdecode_arr farr; \ + int64_t size; \ + \ + if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ + RETURN_GENERIC("string field tag mismatch\n"); \ + } \ + \ + if (UPB_UNLIKELY( \ + !upb_EpsCopyInputStream_AliasingAvailable(&d->input, ptr, 0))) { \ + UPB_MUSTTAIL return copyfunc(UPB_PARSE_ARGS); \ + } \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ + sizeof(upb_StringView), card); \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_StringView)); \ + } \ + \ + size = (int8_t)ptr[tagbytes]; \ + ptr += tagbytes + 1; \ + \ + if (UPB_UNLIKELY( \ + !upb_EpsCopyInputStream_AliasingAvailable(&d->input, ptr, size))) { \ + ptr--; \ + if (validate_utf8) { \ + return fastdecode_longstring_utf8(d, ptr, msg, table, hasbits, \ + (uint64_t)dst); \ + } else { \ + return fastdecode_longstring_noutf8(d, ptr, msg, table, hasbits, \ + (uint64_t)dst); \ + } \ + } \ + \ + dst->data = ptr; \ + dst->size = size; \ + ptr = upb_EpsCopyInputStream_ReadStringAliased(&d->input, &dst->data, \ + dst->size); \ + \ + if (card == CARD_r) { \ + if (validate_utf8 && \ + !_upb_Decoder_VerifyUtf8Inline(dst->data, dst->size)) { \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); \ + } \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_StringView)); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + data = ret.tag; \ + UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + return ptr; \ + } \ + } \ + \ + if (card != CARD_r && validate_utf8) { \ + data = (uint64_t)dst; \ + UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ + } \ + \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); + +/* Generate all combinations: + * {p,c} x {s,o,r} x {s, b} x {1bt,2bt} */ + +#define s_VALIDATE true +#define b_VALIDATE false + +#define F(card, tagbytes, type) \ + UPB_NOINLINE \ + const char* upb_c##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ + FASTDECODE_COPYSTRING(d, ptr, msg, table, hasbits, data, tagbytes, \ + CARD_##card, type##_VALIDATE); \ + } \ + const char* upb_p##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ + FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes, \ + CARD_##card, upb_c##card##type##_##tagbytes##bt, \ + type##_VALIDATE); \ } -} -static void encode_message(upb_encstate* e, const upb_Message* msg, - const upb_MiniTable* m, size_t* size) { - size_t pre_len = e->limit - e->ptr; +#define UTF8(card, tagbytes) \ + F(card, tagbytes, s) \ + F(card, tagbytes, b) - if ((e->options & kUpb_EncodeOption_CheckRequired) && - m->UPB_PRIVATE(required_count)) { - uint64_t msg_head; - memcpy(&msg_head, msg, 8); - msg_head = _upb_BigEndian_Swap64(msg_head); - if (UPB_PRIVATE(_upb_MiniTable_RequiredMask)(m) & ~msg_head) { - encode_err(e, kUpb_EncodeStatus_MissingRequired); - } - } +#define TAGBYTES(card) \ + UTF8(card, 1) \ + UTF8(card, 2) - if ((e->options & kUpb_EncodeOption_SkipUnknown) == 0) { - size_t unknown_size; - const char* unknown = upb_Message_GetUnknown(msg, &unknown_size); +TAGBYTES(s) +TAGBYTES(o) +TAGBYTES(r) - if (unknown) { - encode_bytes(e, unknown, unknown_size); - } - } +#undef s_VALIDATE +#undef b_VALIDATE +#undef F +#undef TAGBYTES +#undef FASTDECODE_LONGSTRING +#undef FASTDECODE_COPYSTRING +#undef FASTDECODE_STRING - if (m->UPB_PRIVATE(ext) != kUpb_ExtMode_NonExtendable) { - /* Encode all extensions together. Unlike C++, we do not attempt to keep - * these in field number order relative to normal fields or even to each - * other. */ - size_t ext_count; - const upb_Extension* ext = - UPB_PRIVATE(_upb_Message_Getexts)(msg, &ext_count); - if (ext_count) { - if (e->options & kUpb_EncodeOption_Deterministic) { - _upb_sortedmap sorted; - _upb_mapsorter_pushexts(&e->sorter, ext, ext_count, &sorted); - while (_upb_sortedmap_nextext(&e->sorter, &sorted, &ext)) { - encode_ext(e, ext, m->UPB_PRIVATE(ext) == kUpb_ExtMode_IsMessageSet); - } - _upb_mapsorter_popmap(&e->sorter, &sorted); - } else { - const upb_Extension* end = ext + ext_count; - for (; ext != end; ext++) { - encode_ext(e, ext, m->UPB_PRIVATE(ext) == kUpb_ExtMode_IsMessageSet); - } - } - } - } +/* message fields *************************************************************/ - if (m->UPB_PRIVATE(field_count)) { - const upb_MiniTableField* f = - &m->UPB_PRIVATE(fields)[m->UPB_PRIVATE(field_count)]; - const upb_MiniTableField* first = &m->UPB_PRIVATE(fields)[0]; - while (f != first) { - f--; - if (encode_shouldencode(e, msg, m->UPB_PRIVATE(subs), f)) { - encode_field(e, msg, m->UPB_PRIVATE(subs), f); - } - } +UPB_INLINE +upb_Message* decode_newmsg_ceil(upb_Decoder* d, const upb_MiniTable* m, + int msg_ceil_bytes) { + size_t size = m->UPB_PRIVATE(size) + sizeof(upb_Message_Internal); + char* msg_data; + if (UPB_LIKELY(msg_ceil_bytes > 0 && + UPB_PRIVATE(_upb_ArenaHas)(&d->arena) >= msg_ceil_bytes)) { + UPB_ASSERT(size <= (size_t)msg_ceil_bytes); + msg_data = d->arena.UPB_PRIVATE(ptr); + d->arena.UPB_PRIVATE(ptr) += size; + UPB_UNPOISON_MEMORY_REGION(msg_data, msg_ceil_bytes); + memset(msg_data, 0, msg_ceil_bytes); + UPB_POISON_MEMORY_REGION(msg_data + size, msg_ceil_bytes - size); + } else { + msg_data = (char*)upb_Arena_Malloc(&d->arena, size); + memset(msg_data, 0, size); } + return msg_data + sizeof(upb_Message_Internal); +} - *size = (e->limit - e->ptr) - pre_len; +typedef struct { + intptr_t table; + upb_Message* msg; +} fastdecode_submsgdata; + +UPB_FORCEINLINE +static const char* fastdecode_tosubmsg(upb_EpsCopyInputStream* e, + const char* ptr, void* ctx) { + upb_Decoder* d = (upb_Decoder*)e; + fastdecode_submsgdata* submsg = ctx; + ptr = fastdecode_dispatch(d, ptr, submsg->msg, submsg->table, 0, 0); + UPB_ASSUME(ptr != NULL); + return ptr; } -static upb_EncodeStatus upb_Encoder_Encode(upb_encstate* const encoder, - const upb_Message* const msg, - const upb_MiniTable* const l, - char** const buf, - size_t* const size) { - // Unfortunately we must continue to perform hackery here because there are - // code paths which blindly copy the returned pointer without bothering to - // check for errors until much later (b/235839510). So we still set *buf to - // NULL on error and we still set it to non-NULL on a successful empty result. - if (UPB_SETJMP(encoder->err) == 0) { - encode_message(encoder, msg, l, size); - *size = encoder->limit - encoder->ptr; - if (*size == 0) { - static char ch; - *buf = &ch; - } else { - UPB_ASSERT(encoder->ptr); - *buf = encoder->ptr; - } - } else { - UPB_ASSERT(encoder->status != kUpb_EncodeStatus_Ok); - *buf = NULL; - *size = 0; +#define FASTDECODE_SUBMSG(d, ptr, msg, table, hasbits, data, tagbytes, \ + msg_ceil_bytes, card) \ + \ + if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ + RETURN_GENERIC("submessage field tag mismatch\n"); \ + } \ + \ + if (--d->depth == 0) { \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_MaxDepthExceeded); \ + } \ + \ + upb_Message** dst; \ + uint32_t submsg_idx = (data >> 16) & 0xff; \ + const upb_MiniTable* tablep = decode_totablep(table); \ + const upb_MiniTable* subtablep = upb_MiniTableSub_Message( \ + *UPB_PRIVATE(_upb_MiniTable_GetSubByIndex)(tablep, submsg_idx)); \ + fastdecode_submsgdata submsg = {decode_totable(subtablep)}; \ + fastdecode_arr farr; \ + \ + if (subtablep->UPB_PRIVATE(table_mask) == (uint8_t)-1) { \ + d->depth++; \ + RETURN_GENERIC("submessage doesn't have fast tables."); \ + } \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ + sizeof(upb_Message*), card); \ + \ + if (card == CARD_s) { \ + *(uint32_t*)msg |= hasbits; \ + hasbits = 0; \ + } \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_Message*)); \ + } \ + \ + submsg.msg = *dst; \ + \ + if (card == CARD_r || UPB_LIKELY(!submsg.msg)) { \ + *dst = submsg.msg = decode_newmsg_ceil(d, subtablep, msg_ceil_bytes); \ + } \ + \ + ptr += tagbytes; \ + ptr = fastdecode_delimited(d, ptr, fastdecode_tosubmsg, &submsg); \ + \ + if (UPB_UNLIKELY(ptr == NULL || d->end_group != DECODE_NOGROUP)) { \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ + } \ + \ + if (card == CARD_r) { \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_Message*)); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + d->depth++; \ + data = ret.tag; \ + UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + d->depth++; \ + return ptr; \ + } \ + } \ + \ + d->depth++; \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); + +#define F(card, tagbytes, size_ceil, ceil_arg) \ + const char* upb_p##card##m_##tagbytes##bt_max##size_ceil##b( \ + UPB_PARSE_PARAMS) { \ + FASTDECODE_SUBMSG(d, ptr, msg, table, hasbits, data, tagbytes, ceil_arg, \ + CARD_##card); \ } - _upb_mapsorter_destroy(&encoder->sorter); - return encoder->status; -} +#define SIZES(card, tagbytes) \ + F(card, tagbytes, 64, 64) \ + F(card, tagbytes, 128, 128) \ + F(card, tagbytes, 192, 192) \ + F(card, tagbytes, 256, 256) \ + F(card, tagbytes, max, -1) -upb_EncodeStatus upb_Encode(const upb_Message* msg, const upb_MiniTable* l, - int options, upb_Arena* arena, char** buf, - size_t* size) { - upb_encstate e; - unsigned depth = (unsigned)options >> 16; +#define TAGBYTES(card) \ + SIZES(card, 1) \ + SIZES(card, 2) - e.status = kUpb_EncodeStatus_Ok; - e.arena = arena; - e.buf = NULL; - e.limit = NULL; - e.ptr = NULL; - e.depth = depth ? depth : kUpb_WireFormat_DefaultDepthLimit; - e.options = options; - _upb_mapsorter_init(&e.sorter); +TAGBYTES(s) +TAGBYTES(o) +TAGBYTES(r) + +#undef TAGBYTES +#undef SIZES +#undef F +#undef FASTDECODE_SUBMSG + +#endif /* UPB_FASTTABLE */ - return upb_Encoder_Encode(&e, msg, l, buf, size); -} +#include +#include // Must be last. -UPB_NOINLINE _upb_WireReader_ReadLongVarintRet -_upb_WireReader_ReadLongVarint(const char* ptr, uint64_t val) { - _upb_WireReader_ReadLongVarintRet ret = {NULL, 0}; +UPB_NOINLINE UPB_PRIVATE(_upb_WireReader_LongVarint) + UPB_PRIVATE(_upb_WireReader_ReadLongVarint)(const char* ptr, uint64_t val) { + UPB_PRIVATE(_upb_WireReader_LongVarint) ret = {NULL, 0}; uint64_t byte; int i; for (i = 1; i < 10; i++) { @@ -15289,9 +15292,9 @@ _upb_WireReader_ReadLongVarint(const char* ptr, uint64_t val) { return ret; } -const char* _upb_WireReader_SkipGroup(const char* ptr, uint32_t tag, - int depth_limit, - upb_EpsCopyInputStream* stream) { +const char* UPB_PRIVATE(_upb_WireReader_SkipGroup)( + const char* ptr, uint32_t tag, int depth_limit, + upb_EpsCopyInputStream* stream) { if (--depth_limit == 0) return NULL; uint32_t end_group_tag = (tag & ~7ULL) | kUpb_WireType_EndGroup; while (!upb_EpsCopyInputStream_IsDone(stream, &ptr)) { diff --git a/ruby/ext/google/protobuf_c/ruby-upb.h b/ruby/ext/google/protobuf_c/ruby-upb.h index 005e2349c286..d6444164c49b 100755 --- a/ruby/ext/google/protobuf_c/ruby-upb.h +++ b/ruby/ext/google/protobuf_c/ruby-upb.h @@ -1356,8 +1356,9 @@ UPB_INLINE const upb_MiniTable* UPB_PRIVATE(_upb_MiniTableSub_Message)( // Must be last. struct upb_Decoder; +struct upb_Message; typedef const char* _upb_FieldParser(struct upb_Decoder* d, const char* ptr, - upb_Message* msg, intptr_t table, + struct upb_Message* msg, intptr_t table, uint64_t hasbits, uint64_t data); typedef struct { uint64_t field_data; @@ -4006,6 +4007,72 @@ UPB_API upb_DecodeStatus upb_Decode(const char* buf, size_t size, #endif /* UPB_WIRE_DECODE_H_ */ +// upb_Encode: parsing from a upb_Message using a upb_MiniTable. + +#ifndef UPB_WIRE_ENCODE_H_ +#define UPB_WIRE_ENCODE_H_ + +#include +#include + + +// Must be last. + +#ifdef __cplusplus +extern "C" { +#endif + +enum { + /* If set, the results of serializing will be deterministic across all + * instances of this binary. There are no guarantees across different + * binary builds. + * + * If your proto contains maps, the encoder will need to malloc()/free() + * memory during encode. */ + kUpb_EncodeOption_Deterministic = 1, + + // When set, unknown fields are not printed. + kUpb_EncodeOption_SkipUnknown = 2, + + // When set, the encode will fail if any required fields are missing. + kUpb_EncodeOption_CheckRequired = 4, +}; + +typedef enum { + kUpb_EncodeStatus_Ok = 0, + kUpb_EncodeStatus_OutOfMemory = 1, // Arena alloc failed + kUpb_EncodeStatus_MaxDepthExceeded = 2, + + // kUpb_EncodeOption_CheckRequired failed but the parse otherwise succeeded. + kUpb_EncodeStatus_MissingRequired = 3, +} upb_EncodeStatus; + +UPB_INLINE uint32_t upb_EncodeOptions_MaxDepth(uint16_t depth) { + return (uint32_t)depth << 16; +} + +UPB_INLINE uint16_t upb_EncodeOptions_GetMaxDepth(uint32_t options) { + return options >> 16; +} + +// Enforce an upper bound on recursion depth. +UPB_INLINE int upb_Encode_LimitDepth(uint32_t encode_options, uint32_t limit) { + uint32_t max_depth = upb_EncodeOptions_GetMaxDepth(encode_options); + if (max_depth > limit) max_depth = limit; + return upb_EncodeOptions_MaxDepth(max_depth) | (encode_options & 0xffff); +} + +UPB_API upb_EncodeStatus upb_Encode(const upb_Message* msg, + const upb_MiniTable* l, int options, + upb_Arena* arena, char** buf, size_t* size); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* UPB_WIRE_ENCODE_H_ */ + // These are the specialized field parser functions for the fast parser. // Generated tables will refer to these by name. // @@ -4040,8 +4107,8 @@ UPB_API upb_DecodeStatus upb_Decode(const char* buf, size_t size, // - '1' for one-byte tags (field numbers 1-15) // - '2' for two-byte tags (field numbers 16-2048) -#ifndef UPB_WIRE_DECODE_FAST_H_ -#define UPB_WIRE_DECODE_FAST_H_ +#ifndef UPB_WIRE_DECODE_INTERNAL_FAST_H_ +#define UPB_WIRE_DECODE_INTERNAL_FAST_H_ // Must be last. @@ -4142,73 +4209,7 @@ TAGBYTES(r) #endif -#endif /* UPB_WIRE_DECODE_FAST_H_ */ - -// upb_Encode: parsing from a upb_Message using a upb_MiniTable. - -#ifndef UPB_WIRE_ENCODE_H_ -#define UPB_WIRE_ENCODE_H_ - -#include -#include - - -// Must be last. - -#ifdef __cplusplus -extern "C" { -#endif - -enum { - /* If set, the results of serializing will be deterministic across all - * instances of this binary. There are no guarantees across different - * binary builds. - * - * If your proto contains maps, the encoder will need to malloc()/free() - * memory during encode. */ - kUpb_EncodeOption_Deterministic = 1, - - // When set, unknown fields are not printed. - kUpb_EncodeOption_SkipUnknown = 2, - - // When set, the encode will fail if any required fields are missing. - kUpb_EncodeOption_CheckRequired = 4, -}; - -typedef enum { - kUpb_EncodeStatus_Ok = 0, - kUpb_EncodeStatus_OutOfMemory = 1, // Arena alloc failed - kUpb_EncodeStatus_MaxDepthExceeded = 2, - - // kUpb_EncodeOption_CheckRequired failed but the parse otherwise succeeded. - kUpb_EncodeStatus_MissingRequired = 3, -} upb_EncodeStatus; - -UPB_INLINE uint32_t upb_EncodeOptions_MaxDepth(uint16_t depth) { - return (uint32_t)depth << 16; -} - -UPB_INLINE uint16_t upb_EncodeOptions_GetMaxDepth(uint32_t options) { - return options >> 16; -} - -// Enforce an upper bound on recursion depth. -UPB_INLINE int upb_Encode_LimitDepth(uint32_t encode_options, uint32_t limit) { - uint32_t max_depth = upb_EncodeOptions_GetMaxDepth(encode_options); - if (max_depth > limit) max_depth = limit; - return upb_EncodeOptions_MaxDepth(max_depth) | (encode_options & 0xffff); -} - -UPB_API upb_EncodeStatus upb_Encode(const upb_Message* msg, - const upb_MiniTable* l, int options, - upb_Arena* arena, char** buf, size_t* size); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - - -#endif /* UPB_WIRE_ENCODE_H_ */ +#endif /* UPB_WIRE_DECODE_INTERNAL_FAST_H_ */ // IWYU pragma: end_exports #endif // UPB_GENERATED_CODE_SUPPORT_H_ @@ -13533,8 +13534,8 @@ UPB_INLINE uint32_t _upb_FastDecoder_LoadTag(const char* ptr) { #endif /* UPB_WIRE_INTERNAL_DECODE_H_ */ -#ifndef UPB_WIRE_INTERNAL_SWAP_H_ -#define UPB_WIRE_INTERNAL_SWAP_H_ +#ifndef UPB_WIRE_INTERNAL_ENDIAN_H_ +#define UPB_WIRE_INTERNAL_ENDIAN_H_ #include @@ -13544,23 +13545,23 @@ UPB_INLINE uint32_t _upb_FastDecoder_LoadTag(const char* ptr) { extern "C" { #endif -UPB_INLINE bool _upb_IsLittleEndian(void) { - int x = 1; +UPB_INLINE bool UPB_PRIVATE(_upb_IsLittleEndian)(void) { + const int x = 1; return *(char*)&x == 1; } -UPB_INLINE uint32_t _upb_BigEndian_Swap32(uint32_t val) { - if (_upb_IsLittleEndian()) return val; +UPB_INLINE uint32_t UPB_PRIVATE(_upb_BigEndian32)(uint32_t val) { + if (UPB_PRIVATE(_upb_IsLittleEndian)()) return val; return ((val & 0xff) << 24) | ((val & 0xff00) << 8) | ((val & 0xff0000) >> 8) | ((val & 0xff000000) >> 24); } -UPB_INLINE uint64_t _upb_BigEndian_Swap64(uint64_t val) { - if (_upb_IsLittleEndian()) return val; +UPB_INLINE uint64_t UPB_PRIVATE(_upb_BigEndian64)(uint64_t val) { + if (UPB_PRIVATE(_upb_IsLittleEndian)()) return val; - return ((uint64_t)_upb_BigEndian_Swap32((uint32_t)val) << 32) | - _upb_BigEndian_Swap32((uint32_t)(val >> 32)); + return ((uint64_t)UPB_PRIVATE(_upb_BigEndian32)((uint32_t)val) << 32) | + UPB_PRIVATE(_upb_BigEndian32)((uint32_t)(val >> 32)); } #ifdef __cplusplus @@ -13568,62 +13569,42 @@ UPB_INLINE uint64_t _upb_BigEndian_Swap64(uint64_t val) { #endif -#endif /* UPB_WIRE_INTERNAL_SWAP_H_ */ +#endif /* UPB_WIRE_INTERNAL_ENDIAN_H_ */ #ifndef UPB_WIRE_READER_H_ #define UPB_WIRE_READER_H_ -#ifndef UPB_WIRE_TYPES_H_ -#define UPB_WIRE_TYPES_H_ - -// A list of types as they are encoded on the wire. -typedef enum { - kUpb_WireType_Varint = 0, - kUpb_WireType_64Bit = 1, - kUpb_WireType_Delimited = 2, - kUpb_WireType_StartGroup = 3, - kUpb_WireType_EndGroup = 4, - kUpb_WireType_32Bit = 5 -} upb_WireType; - -#endif /* UPB_WIRE_TYPES_H_ */ +#ifndef UPB_WIRE_INTERNAL_READER_H_ +#define UPB_WIRE_INTERNAL_READER_H_ // Must be last. -#ifdef __cplusplus -extern "C" { -#endif - -// The upb_WireReader interface is suitable for general-purpose parsing of -// protobuf binary wire format. It is designed to be used along with -// upb_EpsCopyInputStream for buffering, and all parsing routines in this file -// assume that at least kUpb_EpsCopyInputStream_SlopBytes worth of data is -// available to read without any bounds checks. - -#define kUpb_WireReader_WireTypeMask 7 #define kUpb_WireReader_WireTypeBits 3 +#define kUpb_WireReader_WireTypeMask 7 typedef struct { const char* ptr; uint64_t val; -} _upb_WireReader_ReadLongVarintRet; +} UPB_PRIVATE(_upb_WireReader_LongVarint); -_upb_WireReader_ReadLongVarintRet _upb_WireReader_ReadLongVarint( - const char* ptr, uint64_t val); +#ifdef __cplusplus +extern "C" { +#endif + +UPB_PRIVATE(_upb_WireReader_LongVarint) +UPB_PRIVATE(_upb_WireReader_ReadLongVarint)(const char* ptr, uint64_t val); -static UPB_FORCEINLINE const char* _upb_WireReader_ReadVarint(const char* ptr, - uint64_t* val, - int maxlen, - uint64_t maxval) { +static UPB_FORCEINLINE const char* UPB_PRIVATE(_upb_WireReader_ReadVarint)( + const char* ptr, uint64_t* val, int maxlen, uint64_t maxval) { uint64_t byte = (uint8_t)*ptr; if (UPB_LIKELY((byte & 0x80) == 0)) { *val = (uint32_t)byte; return ptr + 1; } const char* start = ptr; - _upb_WireReader_ReadLongVarintRet res = - _upb_WireReader_ReadLongVarint(ptr, byte); + UPB_PRIVATE(_upb_WireReader_LongVarint) + res = UPB_PRIVATE(_upb_WireReader_ReadLongVarint)(ptr, byte); if (!res.ptr || (maxlen < 10 && res.ptr - start > maxlen) || res.val > maxval) { return NULL; // Malformed. @@ -13632,6 +13613,48 @@ static UPB_FORCEINLINE const char* _upb_WireReader_ReadVarint(const char* ptr, return res.ptr; } +UPB_INLINE uint32_t UPB_PRIVATE(_upb_WireReader_GetFieldNumber)(uint32_t tag) { + return tag >> kUpb_WireReader_WireTypeBits; +} + +UPB_INLINE uint8_t UPB_PRIVATE(_upb_WireReader_GetWireType)(uint32_t tag) { + return tag & kUpb_WireReader_WireTypeMask; +} + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif // UPB_WIRE_INTERNAL_READER_H_ + +#ifndef UPB_WIRE_TYPES_H_ +#define UPB_WIRE_TYPES_H_ + +// A list of types as they are encoded on the wire. +typedef enum { + kUpb_WireType_Varint = 0, + kUpb_WireType_64Bit = 1, + kUpb_WireType_Delimited = 2, + kUpb_WireType_StartGroup = 3, + kUpb_WireType_EndGroup = 4, + kUpb_WireType_32Bit = 5 +} upb_WireType; + +#endif /* UPB_WIRE_TYPES_H_ */ + +// Must be last. + +// The upb_WireReader interface is suitable for general-purpose parsing of +// protobuf binary wire format. It is designed to be used along with +// upb_EpsCopyInputStream for buffering, and all parsing routines in this file +// assume that at least kUpb_EpsCopyInputStream_SlopBytes worth of data is +// available to read without any bounds checks. + +#ifdef __cplusplus +extern "C" { +#endif + // Parses a tag into `tag`, and returns a pointer past the end of the tag, or // NULL if there was an error in the tag data. // @@ -13641,7 +13664,7 @@ static UPB_FORCEINLINE const char* _upb_WireReader_ReadVarint(const char* ptr, static UPB_FORCEINLINE const char* upb_WireReader_ReadTag(const char* ptr, uint32_t* tag) { uint64_t val; - ptr = _upb_WireReader_ReadVarint(ptr, &val, 5, UINT32_MAX); + ptr = UPB_PRIVATE(_upb_WireReader_ReadVarint)(ptr, &val, 5, UINT32_MAX); if (!ptr) return NULL; *tag = val; return ptr; @@ -13649,17 +13672,17 @@ static UPB_FORCEINLINE const char* upb_WireReader_ReadTag(const char* ptr, // Given a tag, returns the field number. UPB_INLINE uint32_t upb_WireReader_GetFieldNumber(uint32_t tag) { - return tag >> kUpb_WireReader_WireTypeBits; + return UPB_PRIVATE(_upb_WireReader_GetFieldNumber)(tag); } // Given a tag, returns the wire type. UPB_INLINE uint8_t upb_WireReader_GetWireType(uint32_t tag) { - return tag & kUpb_WireReader_WireTypeMask; + return UPB_PRIVATE(_upb_WireReader_GetWireType)(tag); } UPB_INLINE const char* upb_WireReader_ReadVarint(const char* ptr, uint64_t* val) { - return _upb_WireReader_ReadVarint(ptr, val, 10, UINT64_MAX); + return UPB_PRIVATE(_upb_WireReader_ReadVarint)(ptr, val, 10, UINT64_MAX); } // Skips data for a varint, returning a pointer past the end of the varint, or @@ -13695,7 +13718,7 @@ UPB_INLINE const char* upb_WireReader_ReadSize(const char* ptr, int* size) { UPB_INLINE const char* upb_WireReader_ReadFixed32(const char* ptr, void* val) { uint32_t uval; memcpy(&uval, ptr, 4); - uval = _upb_BigEndian_Swap32(uval); + uval = UPB_PRIVATE(_upb_BigEndian32)(uval); memcpy(val, &uval, 4); return ptr + 4; } @@ -13708,14 +13731,14 @@ UPB_INLINE const char* upb_WireReader_ReadFixed32(const char* ptr, void* val) { UPB_INLINE const char* upb_WireReader_ReadFixed64(const char* ptr, void* val) { uint64_t uval; memcpy(&uval, ptr, 8); - uval = _upb_BigEndian_Swap64(uval); + uval = UPB_PRIVATE(_upb_BigEndian64)(uval); memcpy(val, &uval, 8); return ptr + 8; } -const char* _upb_WireReader_SkipGroup(const char* ptr, uint32_t tag, - int depth_limit, - upb_EpsCopyInputStream* stream); +const char* UPB_PRIVATE(_upb_WireReader_SkipGroup)( + const char* ptr, uint32_t tag, int depth_limit, + upb_EpsCopyInputStream* stream); // Skips data for a group, returning a pointer past the end of the group, or // NULL if there was an error parsing the group. The `tag` argument should be @@ -13728,7 +13751,7 @@ const char* _upb_WireReader_SkipGroup(const char* ptr, uint32_t tag, // control over this? UPB_INLINE const char* upb_WireReader_SkipGroup( const char* ptr, uint32_t tag, upb_EpsCopyInputStream* stream) { - return _upb_WireReader_SkipGroup(ptr, tag, 100, stream); + return UPB_PRIVATE(_upb_WireReader_SkipGroup)(ptr, tag, 100, stream); } UPB_INLINE const char* _upb_WireReader_SkipValue( @@ -13749,7 +13772,8 @@ UPB_INLINE const char* _upb_WireReader_SkipValue( return ptr; } case kUpb_WireType_StartGroup: - return _upb_WireReader_SkipGroup(ptr, tag, depth_limit, stream); + return UPB_PRIVATE(_upb_WireReader_SkipGroup)(ptr, tag, depth_limit, + stream); case kUpb_WireType_EndGroup: return NULL; // Should be handled before now. default: diff --git a/upb/cmake/CMakeLists.txt b/upb/cmake/CMakeLists.txt index 61d3650fdfcb..6374432ba59d 100644 --- a/upb/cmake/CMakeLists.txt +++ b/upb/cmake/CMakeLists.txt @@ -96,8 +96,7 @@ target_link_libraries(generated_code_support__only_for_generated_code_do_not_use message_internal mini_descriptor mini_table - wire - wire_internal) + wire) add_library(generated_cpp_support__only_for_generated_code_do_not_use__i_give_permission_to_break_me INTERFACE