1818#include < rp.hpp>
1919#include < ds.hpp>
2020#include < vk/format_utils.h>
21+ #include < numeric>
2122
2223namespace vil {
2324
@@ -429,20 +430,32 @@ void CommandHookRecord::hookRecordAfterDst(Command& dst, RecordInfo& info) {
429430}
430431
431432u32 CommandHookRecord::vertexCountHint (const DrawCmdBase& bcmd, const RecordInfo& info) const {
433+ // NOTE: for xfb hints, we always use vertexCountHint
434+
432435 if (auto * dcmd = commandCast<const DrawCmd*>(&bcmd); dcmd) {
433436 return dcmd->vertexCount ;
434437 } else if (auto * dcmd = commandCast<const DrawIndexedCmd*>(&bcmd); dcmd) {
435438 return dcmd->indexCount ;
436439 } else if (auto * dcmd = commandCast<const DrawIndirectCmd*>(&bcmd); dcmd) {
437- return dcmd-> isIndexed () ? info. hints . indexCountHint : info.hints .vertexCountHint ;
440+ return info.hints .vertexCountHint ;
438441 } else if (auto * dcmd = commandCast<const DrawIndirectCountCmd*>(&bcmd); dcmd) {
439- return dcmd-> isIndexed () ? info. hints . indexCountHint : info.hints .vertexCountHint ;
442+ return info.hints .vertexCountHint ;
440443 } else if (auto * dcmd = commandCast<const DrawMultiCmd*>(&bcmd); dcmd) {
441- auto & drawInfo = dcmd->vertexInfos [info.ops .vertexInputCmd ];
442- return drawInfo.vertexCount ;
444+ auto cmds = dcmd->vertexInfos ;
445+ if (info.ops .vertexCmd != u32 (-1 )) {
446+ auto count = std::min (info.ops .vertexCmd + 1 , u32 (cmds.size ()));
447+ cmds = cmds.first (count);
448+ }
449+ return std::accumulate (cmds.begin (), cmds.end (), 0u ,
450+ [](auto & a, auto & b) { return a + b.vertexCount ; });
443451 } else if (auto * dcmd = commandCast<const DrawMultiIndexedCmd*>(&bcmd); dcmd) {
444- auto & drawInfo = dcmd->indexInfos [info.ops .vertexInputCmd ];
445- return drawInfo.indexCount ;
452+ auto cmds = dcmd->indexInfos ;
453+ if (info.ops .vertexCmd != u32 (-1 )) {
454+ auto count = std::min (info.ops .vertexCmd + 1 , u32 (cmds.size ()));
455+ cmds = cmds.first (count);
456+ }
457+ return std::accumulate (cmds.begin (), cmds.end (), 0u ,
458+ [](auto & a, auto & b) { return a + b.indexCount ; });
446459 } else {
447460 dlg_fatal (" Unsupported draw command" );
448461 return 0u ;
@@ -473,12 +486,17 @@ void CommandHookRecord::hookRecordDst(Command& cmd, RecordInfo& info) {
473486 vertexCount = 1 * 1024 * 1024 ;
474487 }
475488
489+ // TODO: for large captures, allow only capturing position
490+ auto memType = info.ops .vertexCmd == u32 (-1 ) ?
491+ OwnBuffer::Type::deviceLocal : OwnBuffer::Type::hostVisible;
492+
476493 const auto xfbSize = drawCmd->state ->pipe ->xfbPatch ->stride * vertexCount;
477494 const auto usage =
478495 VK_BUFFER_USAGE_TRANSFER_DST_BIT |
479496 VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT |
480497 VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
481- state->transformFeedback .ensure (dev, xfbSize, usage);
498+ state->transformFeedback .ensure (dev, xfbSize, usage,
499+ {}, {}, memType);
482500
483501 auto offset = VkDeviceSize (0u );
484502 dev.dispatch .CmdBindTransformFeedbackBuffersEXT (cb, 0u , 1u ,
@@ -1221,11 +1239,7 @@ void CommandHookRecord::copyVertexInput(Command& bcmd, RecordInfo& info) {
12211239 // later on so we have to care about queue families
12221240 auto queueFams = combineQueueFamilies ({{record->queueFamily , dev.gfxQueue ->family }});
12231241
1224- // See vertexCopy.md for more information on this beast
1225- // TODO:
1226- // - we currently assume that index buffer offsets are aligned
1227- // to allow binding them as storage buffers. Can't assume that!
1228- // - missing barriers!
1242+ // See vertexCopy.md for more (possibly outdated) information on this beast
12291243
12301244 using Metadata = VertexCopyMetadata;
12311245
@@ -1251,7 +1265,10 @@ void CommandHookRecord::copyVertexInput(Command& bcmd, RecordInfo& info) {
12511265
12521266 auto stage0 = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
12531267 dev.dispatch .CmdWriteTimestamp (cb, stage0, this ->queryPool , timeStampID);
1254- dlg_trace (" vertexCopy timeStamp {}: {}" , timeStampID, name);
1268+
1269+ if (dev.printVertexCaptureTimings ) {
1270+ dlg_trace (" vertexCopy timeStamp {}: {}" , timeStampID, name);
1271+ }
12551272
12561273 ownTimingNames.push_back (name);
12571274
@@ -1395,7 +1412,6 @@ void CommandHookRecord::copyVertexInput(Command& bcmd, RecordInfo& info) {
13951412
13961413 u32 vertexHint = info.hints .vertexCountHint ;
13971414 if (vertexHint == 0u ) {
1398- vertexHint = fallbackVertexCountHint;
13991415 vertexHint = std::max (vertexHint, numIndices); // for indexed drawing
14001416 }
14011417
@@ -1428,13 +1444,15 @@ void CommandHookRecord::copyVertexInput(Command& bcmd, RecordInfo& info) {
14281444 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
14291445
14301446 // write copyType and other meta information
1431- auto wbuf = state->indexBufCopy .writeData ();
14321447 Metadata md {};
1433- md.copyTypeOrIndexOffset = copyTypeVertices;
1448+ md.firstIndex = copyTypeVertices;
14341449 md.firstVertex = params.firstVertex ;
14351450 md.firstInstance = params.firstInstance ;
1436- nytl::write (wbuf, md);
1437- state->indexBufCopy .flushMap ();
1451+ dev.dispatch .CmdUpdateBuffer (cb, state->indexBufCopy .buf , 0u ,
1452+ sizeof (md), &md);
1453+ vku::cmdBarrier (dev, cb, state->indexBufCopy .asSpan (),
1454+ vku::SyncScope::transferWrite (),
1455+ vku::SyncScope::computeReadWrite ());
14381456
14391457 copyVertexBufs (false , params.vertexCount , params.instanceCount , VK_INDEX_TYPE_NONE_KHR);
14401458 };
@@ -1448,6 +1466,8 @@ void CommandHookRecord::copyVertexInput(Command& bcmd, RecordInfo& info) {
14481466 hook.processIndices32_ :
14491467 hook.processIndices16_ ;
14501468
1469+ dlg_assert (cmd->state ->indices .offset % indSize == 0u );
1470+
14511471 auto & dst = state->indexBufCopy ;
14521472 const auto usage =
14531473 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
@@ -1457,9 +1477,9 @@ void CommandHookRecord::copyVertexInput(Command& bcmd, RecordInfo& info) {
14571477 usage, queueFams);
14581478
14591479 // init meta information
1460- auto wbuf = dst.writeData ();
14611480 Metadata md {};
1462- md.copyTypeOrIndexOffset = params.firstIndex ; // indexOffset
1481+ md.firstIndex = params.firstIndex ; // indexOffset
1482+ md.indexBufOffset = cmd->state ->indices .offset / indSize;
14631483 md.minIndex = params.indexCount > 0 ? 0xFFFFFFFFu : 0u ;
14641484 md.maxIndex = 0u ;
14651485 md.indexCount = params.indexCount ;
@@ -1470,23 +1490,27 @@ void CommandHookRecord::copyVertexInput(Command& bcmd, RecordInfo& info) {
14701490 md.dispatchPerVertexZ = 1u ;
14711491 md.firstInstance = params.firstInstance ;
14721492 md.firstVertex = params.vertexOffset ;
1473- nytl::write (wbuf, md);
1474- dst.flushMap ();
1493+ dev.dispatch .CmdUpdateBuffer (cb, dst.buf , 0u ,
1494+ sizeof (md), &md);
1495+ vku::cmdBarrier (dev, cb, state->indexBufCopy .asSpan (),
1496+ vku::SyncScope::transferWrite (),
1497+ vku::SyncScope::computeReadWrite ());
14751498
14761499 auto & ds = allocDs (indexPipe);
14771500 {
14781501 vku::DescriptorUpdate dsu (ds);
1479- // TODO: respect storage buf alignment
1480- dsu (vku::BufferSpan{cmd->state ->indices .buffer ->handle ,
1481- {cmd->state ->indices .offset , VK_WHOLE_SIZE}});
1502+ dsu (vku::BufferSpan{cmd->state ->indices .buffer ->handle , {0u , VK_WHOLE_SIZE}});
14821503 dsu (vku::BufferSpan{dst.buf , {0u , VK_WHOLE_SIZE}});
14831504 }
14841505
1506+ constexpr auto indicesPerInvoc = 8u ; // see vertexCopy.glsl
1507+ const auto groupsX = ceilDivide (params.indexCount , 64u * indicesPerInvoc);
1508+
14851509 dev.dispatch .CmdBindPipeline (cb, VK_PIPELINE_BIND_POINT_COMPUTE,
14861510 indexPipe.pipe ());
14871511 dev.dispatch .CmdBindDescriptorSets (cb, VK_PIPELINE_BIND_POINT_COMPUTE,
14881512 indexPipe.pipeLayout ().vkHandle (), 0u , 1u , &ds.vkHandle (), 0u , nullptr );
1489- dev.dispatch .CmdDispatch (cb, ceilDivide (params. indexCount , 64u ) , 1u , 1u );
1513+ dev.dispatch .CmdDispatch (cb, groupsX , 1u , 1u );
14901514 cmdBarrierCompute (dev, cb, state->indexBufCopy );
14911515
14921516 cmdTimestamp (" after_processIndices" );
@@ -1527,7 +1551,13 @@ void CommandHookRecord::copyVertexInput(Command& bcmd, RecordInfo& info) {
15271551 dev.dispatch .CmdBindDescriptorSets (cb, VK_PIPELINE_BIND_POINT_COMPUTE,
15281552 cmdPipe.pipeLayout ().vkHandle (), 0u , 1u , &ds.vkHandle (), 0u , nullptr );
15291553 dlg_assert (cmdStride % 4u == 0u );
1530- u32 pcr[] = {cmdBufOffsetShader, countBufOffset, info.ops .vertexInputCmd , cmdStride / 4u };
1554+
1555+ u32 pcr[] = {
1556+ cmdBufOffsetShader,
1557+ countBufOffset,
1558+ info.ops .vertexCmd ,
1559+ cmdStride / 4u
1560+ };
15311561 dev.dispatch .CmdPushConstants (cb, cmdPipe.pipeLayout ().vkHandle (),
15321562 VK_SHADER_STAGE_COMPUTE_BIT, 0u , sizeof (pcr), pcr);
15331563 dev.dispatch .CmdDispatch (cb, 1u , 1u , 1u );
@@ -1552,7 +1582,8 @@ void CommandHookRecord::copyVertexInput(Command& bcmd, RecordInfo& info) {
15521582 numIndices = fallbackIndexCountHint;
15531583 }
15541584
1555- state->indexBufCopy .ensure (dev, sizeof (Metadata) + numIndices * indexSize (indexType),
1585+ const auto indSize = indexSize (indexType);
1586+ state->indexBufCopy .ensure (dev, sizeof (Metadata) + numIndices * indSize,
15561587 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
15571588 VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
15581589 VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT);
@@ -1579,17 +1610,26 @@ void CommandHookRecord::copyVertexInput(Command& bcmd, RecordInfo& info) {
15791610 dsu (countBufShader);
15801611 }
15811612
1582- dlg_trace (" countBufOffset: {}" , countBufOffset);
1583- dlg_trace (" drawID: {}" , info.ops .vertexInputCmd );
1613+ // dlg_trace("countBufOffset: {}", countBufOffset);
1614+ // dlg_trace("drawID: {}", info.ops.vertexCmd );
15841615
15851616 dsu.apply ();
15861617
15871618 dev.dispatch .CmdBindPipeline (cb, VK_PIPELINE_BIND_POINT_COMPUTE,
15881619 cmdPipe.pipe ());
15891620 dev.dispatch .CmdBindDescriptorSets (cb, VK_PIPELINE_BIND_POINT_COMPUTE,
15901621 cmdPipe.pipeLayout ().vkHandle (), 0u , 1u , &ds.vkHandle (), 0u , nullptr );
1622+
15911623 dlg_assert (cmdStride % 4u == 0u );
1592- u32 pcr[] = {cmdBufOffsetShader, countBufOffset, info.ops .vertexInputCmd , cmdStride / 4u };
1624+ dlg_assert (cmd->state ->indices .offset % indSize == 0u );
1625+
1626+ u32 pcr[] = {
1627+ cmdBufOffsetShader,
1628+ countBufOffset,
1629+ info.ops .vertexCmd ,
1630+ cmdStride / 4u ,
1631+ u32 (cmd->state ->indices .offset / indSize),
1632+ };
15931633 dev.dispatch .CmdPushConstants (cb, cmdPipe.pipeLayout ().vkHandle (),
15941634 VK_SHADER_STAGE_COMPUTE_BIT, 0u , sizeof (pcr), pcr);
15951635 dev.dispatch .CmdDispatch (cb, 1u , 1u , 1u );
@@ -1608,8 +1648,7 @@ void CommandHookRecord::copyVertexInput(Command& bcmd, RecordInfo& info) {
16081648 vku::DescriptorUpdate dsu (ds);
16091649
16101650 auto & inds = cmd->state ->indices ;
1611- // TODO: respect storage buffer alignment
1612- dsu (vku::BufferSpan{inds.buffer ->handle , {inds.offset , VK_WHOLE_SIZE}});
1651+ dsu (vku::BufferSpan{inds.buffer ->handle , {0u , VK_WHOLE_SIZE}});
16131652 dsu (state->indexBufCopy .asSpan ());
16141653 dsu.apply ();
16151654
@@ -1665,10 +1704,10 @@ void CommandHookRecord::copyVertexInput(Command& bcmd, RecordInfo& info) {
16651704 } else if (auto * dcmd = commandCast<DrawMultiCmd*>(&bcmd); dcmd) {
16661705 cmd = dcmd;
16671706
1668- dlg_assertm_or (info.ops .vertexInputCmd < dcmd->vertexInfos .size (), return ,
1707+ dlg_assertm_or (info.ops .vertexCmd < dcmd->vertexInfos .size (), return ,
16691708 " Command to copy out-of-range" );
16701709
1671- auto & drawInfo = dcmd->vertexInfos [info.ops .vertexInputCmd ];
1710+ auto & drawInfo = dcmd->vertexInfos [info.ops .vertexCmd ];
16721711
16731712 VkDrawIndirectCommand params;
16741713 params.vertexCount = drawInfo.vertexCount ;
@@ -1680,10 +1719,10 @@ void CommandHookRecord::copyVertexInput(Command& bcmd, RecordInfo& info) {
16801719 } else if (auto * dcmd = commandCast<DrawMultiIndexedCmd*>(&bcmd); dcmd) {
16811720 cmd = dcmd;
16821721
1683- dlg_assertm_or (info.ops .vertexInputCmd < dcmd->indexInfos .size (), return ,
1722+ dlg_assertm_or (info.ops .vertexCmd < dcmd->indexInfos .size (), return ,
16841723 " Command to copy out-of-range" );
16851724
1686- auto & drawInfo = dcmd->indexInfos [info.ops .vertexInputCmd ];
1725+ auto & drawInfo = dcmd->indexInfos [info.ops .vertexCmd ];
16871726
16881727 VkDrawIndexedIndirectCommand params;
16891728 params.indexCount = drawInfo.indexCount ;
0 commit comments