Skip to content

Commit

Permalink
Support streams to/from arrays of wide data (verilator#5334)
Browse files Browse the repository at this point in the history
  • Loading branch information
kozdra authored Aug 6, 2024
1 parent 3426ee5 commit e6fe367
Show file tree
Hide file tree
Showing 4 changed files with 305 additions and 57 deletions.
304 changes: 259 additions & 45 deletions include/verilated_funcs.h
Original file line number Diff line number Diff line change
Expand Up @@ -1585,71 +1585,184 @@ static inline WDataOutP VL_STREAML_WWI(int lbits, WDataOutP owp, WDataInP const
return owp;
}

template <typename T>
static inline IData VL_PACK_II(int obits, int lbits, const VlQueue<T>& q) {
static inline IData VL_PACK_II(int obits, int lbits, const VlQueue<CData>& q) {
IData ret = 0;
for (size_t i = 0; i < q.size(); ++i) ret |= static_cast<IData>(q.at(i)) << (i * lbits);
return ret;
}

static inline IData VL_PACK_II(int obits, int lbits, const VlQueue<SData>& q) {
IData ret = 0;
for (size_t i = 0; i < q.size(); ++i) ret |= static_cast<IData>(q.at(i)) << (i * lbits);
return ret;
}

static inline IData VL_PACK_II(int obits, int lbits, const VlQueue<IData>& q) {
IData ret = 0;
for (size_t i = 0; i < q.size(); ++i) ret |= q.at(i) << (i * lbits);
return ret;
}

template <typename T, std::size_t T_Depth>
static inline IData VL_PACK_II(int obits, int lbits, const VlUnpacked<T, T_Depth>& q) {
template <std::size_t T_Depth>
static inline IData VL_PACK_II(int obits, int lbits, const VlUnpacked<CData, T_Depth>& q) {
IData ret = 0;
for (size_t i = 0; i < T_Depth; ++i)
ret |= static_cast<IData>(q[T_Depth - 1 - i]) << (i * lbits);
return ret;
}

template <std::size_t T_Depth>
static inline IData VL_PACK_II(int obits, int lbits, const VlUnpacked<SData, T_Depth>& q) {
IData ret = 0;
for (size_t i = 0; i < T_Depth; ++i)
ret |= static_cast<IData>(q[T_Depth - 1 - i]) << (i * lbits);
return ret;
}

template <std::size_t T_Depth>
static inline IData VL_PACK_II(int obits, int lbits, const VlUnpacked<IData, T_Depth>& q) {
IData ret = 0;
for (size_t i = 0; i < T_Depth; ++i) ret |= q[T_Depth - 1 - i] << (i * lbits);
return ret;
}

#define VL_PACK_QI VL_PACK_QQ
static inline QData VL_PACK_QI(int obits, int lbits, const VlQueue<CData>& q) {
QData ret = 0;
for (size_t i = 0; i < q.size(); ++i) ret |= static_cast<QData>(q.at(i)) << (i * lbits);
return ret;
}

static inline QData VL_PACK_QI(int obits, int lbits, const VlQueue<SData>& q) {
QData ret = 0;
for (size_t i = 0; i < q.size(); ++i) ret |= static_cast<QData>(q.at(i)) << (i * lbits);
return ret;
}

template <typename T>
static inline QData VL_PACK_QQ(int obits, int lbits, const VlQueue<T>& q) {
static inline QData VL_PACK_QI(int obits, int lbits, const VlQueue<IData>& q) {
QData ret = 0;
for (size_t i = 0; i < q.size(); ++i) ret |= static_cast<QData>(q.at(i)) << (i * lbits);
return ret;
}

template <typename T, std::size_t T_Depth>
static inline QData VL_PACK_QQ(int obits, int lbits, const VlUnpacked<T, T_Depth>& q) {
template <std::size_t T_Depth>
static inline QData VL_PACK_QI(int obits, int lbits, const VlUnpacked<CData, T_Depth>& q) {
QData ret = 0;
for (size_t i = 0; i < T_Depth; ++i)
ret |= static_cast<QData>(q[T_Depth - 1 - i]) << (i * lbits);
return ret;
}

template <typename T>
static inline WDataOutP VL_PACK_WI(int obits, int lbits, WDataOutP owp, const VlQueue<T>& q) {
template <std::size_t T_Depth>
static inline QData VL_PACK_QI(int obits, int lbits, const VlUnpacked<SData, T_Depth>& q) {
QData ret = 0;
for (size_t i = 0; i < T_Depth; ++i)
ret |= static_cast<QData>(q[T_Depth - 1 - i]) << (i * lbits);
return ret;
}

template <std::size_t T_Depth>
static inline QData VL_PACK_QI(int obits, int lbits, const VlUnpacked<IData, T_Depth>& q) {
QData ret = 0;
for (size_t i = 0; i < T_Depth; ++i)
ret |= static_cast<QData>(q[T_Depth - 1 - i]) << (i * lbits);
return ret;
}

static inline QData VL_PACK_QQ(int obits, int lbits, const VlQueue<QData>& q) {
QData ret = 0;
for (size_t i = 0; i < q.size(); ++i) ret |= q.at(i) << (i * lbits);
return ret;
}

template <std::size_t T_Depth>
static inline QData VL_PACK_QQ(int obits, int lbits, const VlUnpacked<QData, T_Depth>& q) {
QData ret = 0;
for (size_t i = 0; i < T_Depth; ++i) ret |= q[T_Depth - 1 - i] << (i * lbits);
return ret;
}

static inline WDataOutP VL_PACK_WI(int obits, int lbits, WDataOutP owp, const VlQueue<CData>& q) {
VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1);
for (size_t i = 0; i < q.size(); ++i)
_vl_insert_WI(owp, q.at(i), i * lbits + lbits - 1, i * lbits);
return owp;
}

template <typename T, std::size_t T_Depth>
static inline WDataOutP VL_PACK_WI(int obits, int lbits, WDataOutP owp, const VlQueue<SData>& q) {
VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1);
for (size_t i = 0; i < q.size(); ++i)
_vl_insert_WI(owp, q.at(i), i * lbits + lbits - 1, i * lbits);
return owp;
}

static inline WDataOutP VL_PACK_WI(int obits, int lbits, WDataOutP owp, const VlQueue<IData>& q) {
VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1);
for (size_t i = 0; i < q.size(); ++i)
_vl_insert_WI(owp, q.at(i), i * lbits + lbits - 1, i * lbits);
return owp;
}

template <std::size_t T_Depth>
static inline WDataOutP VL_PACK_WI(int obits, int lbits, WDataOutP owp,
const VlUnpacked<CData, T_Depth>& q) {
VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1);
for (size_t i = 0; i < T_Depth; ++i)
_vl_insert_WI(owp, q[T_Depth - 1 - i], i * lbits + lbits - 1, i * lbits);
return owp;
}

template <std::size_t T_Depth>
static inline WDataOutP VL_PACK_WI(int obits, int lbits, WDataOutP owp,
const VlUnpacked<SData, T_Depth>& q) {
VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1);
for (size_t i = 0; i < T_Depth; ++i)
_vl_insert_WI(owp, q[T_Depth - 1 - i], i * lbits + lbits - 1, i * lbits);
return owp;
}

template <std::size_t T_Depth>
static inline WDataOutP VL_PACK_WI(int obits, int lbits, WDataOutP owp,
const VlUnpacked<T, T_Depth>& q) {
const VlUnpacked<IData, T_Depth>& q) {
VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1);
for (size_t i = 0; i < T_Depth; ++i)
_vl_insert_WI(owp, q[T_Depth - 1 - i], i * lbits + lbits - 1, i * lbits);
return owp;
}

template <typename T>
static inline WDataOutP VL_PACK_WQ(int obits, int lbits, WDataOutP owp, const VlQueue<T>& q) {
static inline WDataOutP VL_PACK_WQ(int obits, int lbits, WDataOutP owp, const VlQueue<QData>& q) {
VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1);
for (size_t i = 0; i < q.size(); ++i)
_vl_insert_WQ(owp, q.at(i), i * lbits + lbits - 1, i * lbits);
return owp;
}

template <typename T, std::size_t T_Depth>
template <std::size_t T_Depth>
static inline WDataOutP VL_PACK_WQ(int obits, int lbits, WDataOutP owp,
const VlUnpacked<T, T_Depth>& q) {
const VlUnpacked<QData, T_Depth>& q) {
VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1);
for (size_t i = 0; i < T_Depth; ++i)
_vl_insert_WQ(owp, q[T_Depth - 1 - i], i * lbits + lbits - 1, i * lbits);
return owp;
}

template <std::size_t N>
static inline WDataOutP VL_PACK_WW(int obits, int lbits, WDataOutP owp,
const VlQueue<VlWide<N>>& q) {
VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1);
for (size_t i = 0; i < q.size(); ++i)
_vl_insert_WW(owp, q.at(i), i * lbits + lbits - 1, i * lbits);
return owp;
}

template <std::size_t T_Depth, std::size_t N>
static inline WDataOutP VL_PACK_WW(int obits, int lbits, WDataOutP owp,
const VlUnpacked<VlWide<N>, T_Depth>& q) {
VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1);
for (size_t i = 0; i < T_Depth; ++i)
_vl_insert_WW(owp, q[T_Depth - 1 - i], i * lbits + lbits - 1, i * lbits);
return owp;
}

// Because concats are common and wide, it's valuable to always have a clean output.
// Thus we specify inputs must be clean, so we don't need to clean the output.
// Note the bit shifts are always constants, so the adds in these constify out.
Expand Down Expand Up @@ -2089,68 +2202,169 @@ static inline WDataOutP VL_SEL_WWII(int obits, int lbits, WDataOutP owp, WDataIn
//======================================================================
// Expressions needing insert/select

template <typename T>
static inline void VL_UNPACK_II(int lbits, int rbits, VlQueue<T>& q, IData from) {
static inline void VL_UNPACK_II(int lbits, int rbits, VlQueue<CData>& q, IData from) {
const size_t size = (rbits + lbits - 1) / lbits;
q.renew(size);
const IData mask = VL_MASK_I(lbits);
for (size_t i = 0; i < size; ++i) q.at(i) = static_cast<T>((from >> (i * lbits)) & mask);
for (size_t i = 0; i < size; ++i) q.at(i) = (from >> (i * lbits)) & mask;
}

#define VL_UNPACK_IQ VL_UNPACK_QQ
static inline void VL_UNPACK_II(int lbits, int rbits, VlQueue<SData>& q, IData from) {
const size_t size = (rbits + lbits - 1) / lbits;
q.renew(size);
const IData mask = VL_MASK_I(lbits);
for (size_t i = 0; i < size; ++i) q.at(i) = (from >> (i * lbits)) & mask;
}

static inline void VL_UNPACK_II(int lbits, int rbits, VlQueue<IData>& q, IData from) {
const size_t size = (rbits + lbits - 1) / lbits;
q.renew(size);
const IData mask = VL_MASK_I(lbits);
for (size_t i = 0; i < size; ++i) q.at(i) = (from >> (i * lbits)) & mask;
}

template <typename T>
static inline void VL_UNPACK_QQ(int lbits, int rbits, VlQueue<T>& q, QData from) {
static inline void VL_UNPACK_IQ(int lbits, int rbits, VlQueue<CData>& q, QData from) {
const size_t size = (rbits + lbits - 1) / lbits;
q.renew(size);
const IData mask = VL_MASK_I(lbits);
for (size_t i = 0; i < size; ++i) q.at(i) = (from >> (i * lbits)) & mask;
}

static inline void VL_UNPACK_IQ(int lbits, int rbits, VlQueue<SData>& q, QData from) {
const size_t size = (rbits + lbits - 1) / lbits;
q.renew(size);
const IData mask = VL_MASK_I(lbits);
for (size_t i = 0; i < size; ++i) q.at(i) = (from >> (i * lbits)) & mask;
}

static inline void VL_UNPACK_IQ(int lbits, int rbits, VlQueue<IData>& q, QData from) {
const size_t size = (rbits + lbits - 1) / lbits;
q.renew(size);
const IData mask = VL_MASK_I(lbits);
for (size_t i = 0; i < size; ++i) q.at(i) = (from >> (i * lbits)) & mask;
}

static inline void VL_UNPACK_QQ(int lbits, int rbits, VlQueue<QData>& q, QData from) {
const size_t size = (rbits + lbits - 1) / lbits;
q.renew(size);
const QData mask = VL_MASK_Q(lbits);
for (size_t i = 0; i < size; ++i) q.at(i) = static_cast<T>((from >> (i * lbits)) & mask);
for (size_t i = 0; i < size; ++i) q.at(i) = (from >> (i * lbits)) & mask;
}

static inline void VL_UNPACK_IW(int lbits, int rbits, VlQueue<CData>& q, WDataInP rwp) {
const int size = (rbits + lbits - 1) / lbits;
q.renew(size);
const IData mask = VL_MASK_I(lbits);
for (size_t i = 0; i < size; ++i) q.at(i) = VL_SEL_IWII(rbits, rwp, i * lbits, lbits) & mask;
}

static inline void VL_UNPACK_IW(int lbits, int rbits, VlQueue<SData>& q, WDataInP rwp) {
const int size = (rbits + lbits - 1) / lbits;
q.renew(size);
const IData mask = VL_MASK_I(lbits);
for (size_t i = 0; i < size; ++i) q.at(i) = VL_SEL_IWII(rbits, rwp, i * lbits, lbits) & mask;
}

template <typename T>
static inline void VL_UNPACK_IW(int lbits, int rbits, VlQueue<T>& q, WDataInP rwp) {
static inline void VL_UNPACK_IW(int lbits, int rbits, VlQueue<IData>& q, WDataInP rwp) {
const int size = (rbits + lbits - 1) / lbits;
q.renew(size);
const IData mask = VL_MASK_I(lbits);
for (size_t i = 0; i < size; ++i)
q.at(i) = static_cast<T>(VL_SEL_IWII(rbits, rwp, i * lbits, lbits) & mask);
for (size_t i = 0; i < size; ++i) q.at(i) = VL_SEL_IWII(rbits, rwp, i * lbits, lbits) & mask;
}

template <typename T>
static inline void VL_UNPACK_QW(int lbits, int rbits, VlQueue<T>& q, WDataInP rwp) {
static inline void VL_UNPACK_QW(int lbits, int rbits, VlQueue<QData>& q, WDataInP rwp) {
const int size = (rbits + lbits - 1) / lbits;
q.renew(size);
const QData mask = VL_MASK_Q(lbits);
for (size_t i = 0; i < size; ++i)
q.at(i) = static_cast<T>(VL_SEL_QWII(rbits, rwp, i * lbits, lbits) & mask);
for (size_t i = 0; i < size; ++i) q.at(i) = VL_SEL_QWII(rbits, rwp, i * lbits, lbits) & mask;
}

template <typename T, std::size_t T_Depth>
static inline void VL_UNPACK_II(int lbits, int rbits, VlUnpacked<T, T_Depth>& q, QData from) {
template <std::size_t N>
static inline void VL_UNPACK_WW(int lbits, int rbits, VlQueue<VlWide<N>>& q, WDataInP rwp) {
const int size = (rbits + lbits - 1) / lbits;
q.renew(size);
for (size_t i = 0; i < size; ++i) VL_SEL_WWII(lbits, rbits, q.at(i), rwp, i * lbits, lbits);
}

template <std::size_t T_Depth>
static inline void VL_UNPACK_II(int lbits, int rbits, VlUnpacked<CData, T_Depth>& q, IData from) {
const IData mask = VL_MASK_I(lbits);
for (size_t i = 0; i < T_Depth; ++i)
q[i] = static_cast<T>((from >> ((T_Depth - 1 - i) * lbits)) & mask);
for (size_t i = 0; i < T_Depth; ++i) q[i] = (from >> ((T_Depth - 1 - i) * lbits)) & mask;
}

template <std::size_t T_Depth>
static inline void VL_UNPACK_II(int lbits, int rbits, VlUnpacked<SData, T_Depth>& q, IData from) {
const IData mask = VL_MASK_I(lbits);
for (size_t i = 0; i < T_Depth; ++i) q[i] = (from >> ((T_Depth - 1 - i) * lbits)) & mask;
}

template <typename T, std::size_t T_Depth>
static inline void VL_UNPACK_QQ(int lbits, int rbits, VlUnpacked<T, T_Depth>& q, QData from) {
template <std::size_t T_Depth>
static inline void VL_UNPACK_II(int lbits, int rbits, VlUnpacked<IData, T_Depth>& q, IData from) {
const IData mask = VL_MASK_I(lbits);
for (size_t i = 0; i < T_Depth; ++i) q[i] = (from >> ((T_Depth - 1 - i) * lbits)) & mask;
}

template <std::size_t T_Depth>
static inline void VL_UNPACK_IQ(int lbits, int rbits, VlUnpacked<CData, T_Depth>& q, QData from) {
const IData mask = VL_MASK_I(lbits);
for (size_t i = 0; i < T_Depth; ++i) q[i] = (from >> ((T_Depth - 1 - i) * lbits)) & mask;
}

template <std::size_t T_Depth>
static inline void VL_UNPACK_IQ(int lbits, int rbits, VlUnpacked<SData, T_Depth>& q, QData from) {
const IData mask = VL_MASK_I(lbits);
for (size_t i = 0; i < T_Depth; ++i) q[i] = (from >> ((T_Depth - 1 - i) * lbits)) & mask;
}

template <std::size_t T_Depth>
static inline void VL_UNPACK_IQ(int lbits, int rbits, VlUnpacked<IData, T_Depth>& q, QData from) {
const IData mask = VL_MASK_I(lbits);
for (size_t i = 0; i < T_Depth; ++i) q[i] = (from >> ((T_Depth - 1 - i) * lbits)) & mask;
}

template <std::size_t T_Depth>
static inline void VL_UNPACK_QQ(int lbits, int rbits, VlUnpacked<QData, T_Depth>& q, QData from) {
const QData mask = VL_MASK_Q(lbits);
for (size_t i = 0; i < T_Depth; ++i) q[i] = (from >> ((T_Depth - 1 - i) * lbits)) & mask;
}

template <std::size_t T_Depth>
static inline void VL_UNPACK_IW(int lbits, int rbits, VlUnpacked<CData, T_Depth>& q,
WDataInP rwp) {
const IData mask = VL_MASK_I(lbits);
for (size_t i = 0; i < T_Depth; ++i)
q[i] = static_cast<T>((from >> ((T_Depth - 1 - i) * lbits)) & mask);
q[i] = VL_SEL_IWII(rbits, rwp, (T_Depth - 1 - i) * lbits, lbits) & mask;
}

template <typename T, std::size_t T_Depth>
static inline void VL_UNPACK_IW(int lbits, int rbits, VlUnpacked<T, T_Depth>& q, WDataInP rwp) {
template <std::size_t T_Depth>
static inline void VL_UNPACK_IW(int lbits, int rbits, VlUnpacked<SData, T_Depth>& q,
WDataInP rwp) {
const IData mask = VL_MASK_I(lbits);
for (size_t i = 0; i < T_Depth; ++i)
q[i] = static_cast<T>(VL_SEL_IWII(rbits, rwp, (T_Depth - 1 - i) * lbits, lbits) & mask);
q[i] = VL_SEL_IWII(rbits, rwp, (T_Depth - 1 - i) * lbits, lbits) & mask;
}

template <typename T, std::size_t T_Depth>
static inline void VL_UNPACK_QW(int lbits, int rbits, VlUnpacked<T, T_Depth>& q, WDataInP rwp) {
template <std::size_t T_Depth>
static inline void VL_UNPACK_IW(int lbits, int rbits, VlUnpacked<IData, T_Depth>& q,
WDataInP rwp) {
const IData mask = VL_MASK_I(lbits);
for (size_t i = 0; i < T_Depth; ++i)
q[i] = VL_SEL_IWII(rbits, rwp, (T_Depth - 1 - i) * lbits, lbits) & mask;
}

template <std::size_t T_Depth>
static inline void VL_UNPACK_QW(int lbits, int rbits, VlUnpacked<QData, T_Depth>& q,
WDataInP rwp) {
const QData mask = VL_MASK_Q(lbits);
for (size_t i = 0; i < T_Depth; ++i)
q[i] = static_cast<T>(VL_SEL_QWII(rbits, rwp, (T_Depth - 1 - i) * lbits, lbits) & mask);
q[i] = VL_SEL_QWII(rbits, rwp, (T_Depth - 1 - i) * lbits, lbits) & mask;
}

template <std::size_t T_Depth, std::size_t N>
static inline void VL_UNPACK_WW(int lbits, int rbits, VlUnpacked<VlWide<N>, T_Depth>& q,
WDataInP rwp) {
for (size_t i = 0; i < T_Depth; ++i)
VL_SEL_WWII(lbits, rbits, q[i], rwp, (T_Depth - 1 - i) * lbits, lbits);
}

// Return QData from double (numeric)
Expand Down
Loading

0 comments on commit e6fe367

Please sign in to comment.