#include <gui/gui.hpp>
#include <gui/util.hpp>
#include <gui/render.hpp>
#include <gui/resources.hpp>
#include <gui/cb.hpp>
#include <gui/fonts.hpp>
#include <gui/fontAwesome.hpp>
#include <commandHook/hook.hpp>
#include <commandHook/record.hpp>
#include <layer.hpp>
#include <stats.hpp>
#include <queue.hpp>
#include <handle.hpp>
#include <data.hpp>
#include <wrap.hpp>
#include <handles.hpp>
#include <command/commands.hpp>
#include <util/util.hpp>
#include <util/bytes.hpp>
#include <util/vecOps.hpp>
#include <util/profiling.hpp>

#include <vil_api.h>
#include <imgui/imgui.h>
#include <imgui/imgui_internal.h>
#include <vk/enumString.hpp>
#include <vk/format_utils.h>

#include <set>
#include <map>
#include <fstream>
#include <filesystem>

#ifdef VIL_DEBUG
	// NOTE: tmp deubgging tool for the LMM algorithm
	// #define VIL_VIZ_LCS
#endif // DEBUG

#ifdef VIL_VIZ_LCS
	#include <gui/vizlcs.hpp>
#endif // VIL_VIZ_LCS

inline namespace imgui_vil {

thread_local ImGuiContext* __LayerImGui;

}

namespace vil {

// from gui/pipes.cpp
// we factor pipeline creation out in a separate file to improve
// compile times when changing shaders or gui.cpp
void initPipes(Device& dev,
	VkRenderPass rp, VkPipelineLayout renderPipeLayout,
	VkPipelineLayout compPipeLayout,
	VkPipelineLayout histogramPipeLayout,
	Gui::Pipelines& dstPipes);

// Gui
Gui::Gui() = default;

void Gui::init(Device& dev, VkFormat colorFormat, VkFormat depthFormat, bool clear) {
	dev_ = &dev;
	clear_ = clear;

	lastFrame_ = Clock::now();

	// init command pool
	VkCommandPoolCreateInfo cpci {};
	cpci.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
	cpci.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
	cpci.queueFamilyIndex = dev.gfxQueue->family;
	VK_CHECK(dev.dispatch.CreateCommandPool(dev.handle, &cpci, nullptr, &commandPool_));
	nameHandle(dev, commandPool_, "Gui:commandPool");

	// init render stuff
	// descriptor set layout
	VkDescriptorSetLayoutBinding binding {};
	binding.binding = 0u;
	binding.descriptorCount = 1u;
	binding.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
	binding.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;

	VkDescriptorSetLayoutCreateInfo dslci {};
	dslci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
	dslci.bindingCount = 1u;
	dslci.pBindings = &binding;
	VK_CHECK(dev.dispatch.CreateDescriptorSetLayout(dev.handle, &dslci, nullptr, &dsLayout_));
	nameHandle(dev, dsLayout_, "Gui:dsLayout");

	// img op compute ds layout
	VkDescriptorSetLayoutBinding imgOpBindings[2] {};
	imgOpBindings[0].binding = 0u;
	imgOpBindings[0].descriptorCount = 1u;
	imgOpBindings[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
	imgOpBindings[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;

	imgOpBindings[1].binding = 1u;
	imgOpBindings[1].descriptorCount = 1u;
	imgOpBindings[1].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
	imgOpBindings[1].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
	imgOpBindings[1].pImmutableSamplers = &dev.nearestSampler;

	dslci.bindingCount = 2u;
	dslci.pBindings = imgOpBindings;
	VK_CHECK(dev.dispatch.CreateDescriptorSetLayout(dev.handle, &dslci, nullptr, &imgOpDsLayout_));
	nameHandle(dev, imgOpDsLayout_, "Gui:imgOpDsLayout");

	// histogram ds layout
	VkDescriptorSetLayoutBinding histBindings[2] {};
	histBindings[0].binding = 0u;
	histBindings[0].descriptorCount = 1u;
	histBindings[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
	histBindings[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT | VK_SHADER_STAGE_VERTEX_BIT;

	histBindings[1].binding = 1u;
	histBindings[1].descriptorCount = 1u;
	histBindings[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
	histBindings[1].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT | VK_SHADER_STAGE_VERTEX_BIT;

	dslci.bindingCount = 2u;
	dslci.pBindings = histBindings;
	VK_CHECK(dev.dispatch.CreateDescriptorSetLayout(dev.handle, &dslci, nullptr, &histogramDsLayout_));
	nameHandle(dev, histogramDsLayout_, "Gui:histogramDsLayout");

	// pipeline layout
	// We just allocate the full push constant range that all implementations
	// must support.
	VkPushConstantRange pcrs[1] = {};
	pcrs[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_COMPUTE_BIT;
	pcrs[0].offset = 0;
	// PERF: perf most pipelines don't need this much. Could create multiple
	// pipe layouts.
	pcrs[0].size = 128; // needed e.g. for vertex viewer pipeline

	VkPipelineLayoutCreateInfo plci {};
	plci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
	plci.setLayoutCount = 1;
	plci.pSetLayouts = &dsLayout_;
	plci.pushConstantRangeCount = 1;
	plci.pPushConstantRanges = pcrs;
	VK_CHECK(dev.dispatch.CreatePipelineLayout(dev.handle, &plci, nullptr, &pipeLayout_));
	nameHandle(dev, pipeLayout_, "Gui:pipeLayout");

	// histogram ops pipe layout
	plci.pSetLayouts = &histogramDsLayout_;
	VK_CHECK(dev.dispatch.CreatePipelineLayout(dev.handle, &plci, nullptr, &histogramPipeLayout_));
	nameHandle(dev, histogramPipeLayout_, "Gui:histogramPipeLayout");

	// Init image compute pipeline layout
	pcrs[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
	plci.pSetLayouts = &imgOpDsLayout_;
	VK_CHECK(dev.dispatch.CreatePipelineLayout(dev.handle, &plci, nullptr, &imgOpPipeLayout_));
	nameHandle(dev, imgOpPipeLayout_, "Gui:imgOpPipeLayout");

	// render pass
	VkAttachmentDescription atts[2] {};

	auto& colorAtt = atts[0];
	colorAtt.format = colorFormat;
	colorAtt.samples = VK_SAMPLE_COUNT_1_BIT;
	colorAtt.loadOp = clear ?
		VK_ATTACHMENT_LOAD_OP_CLEAR :
		VK_ATTACHMENT_LOAD_OP_LOAD;
	colorAtt.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
	colorAtt.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
	colorAtt.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
	colorAtt.initialLayout = clear ?
		VK_IMAGE_LAYOUT_UNDEFINED :
		VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
	colorAtt.finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;

	auto& depthAtt = atts[1];
	depthAtt.format = depthFormat;
	depthAtt.samples = VK_SAMPLE_COUNT_1_BIT;
	depthAtt.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
	depthAtt.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
	depthAtt.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
	depthAtt.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
	depthAtt.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
	depthAtt.finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; // don't really care atm

	VkAttachmentReference colorRef = {};
	colorRef.attachment = 0;
	colorRef.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;

	VkAttachmentReference depthRef = {};
	depthRef.attachment = 1;
	depthRef.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;

	VkSubpassDescription subpass = {};
	subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
	subpass.colorAttachmentCount = 1;
	subpass.pColorAttachments = &colorRef;
	subpass.pDepthStencilAttachment = &depthRef;

	VkRenderPassCreateInfo rpi {};
	rpi.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO;
	rpi.attachmentCount = 2;
	rpi.pAttachments = atts;
	rpi.subpassCount = 1;
	rpi.pSubpasses = &subpass;

	VK_CHECK(dev.dispatch.CreateRenderPass(dev.handle, &rpi, nullptr, &rp_));
	nameHandle(dev, rp_, "Gui:rp");

	initPipes(dev, rp_, pipeLayout_, imgOpPipeLayout_, histogramPipeLayout_, pipes_);
	initImGui();

	// init blur
	auto blurBackground = checkEnvBinary("VIL_BLUR", true);
	if(!clear && blurBackground) {
		vil::init(blur_, dev);

		VkDescriptorSetAllocateInfo dai {};
		dai.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
		dai.descriptorSetCount = 1u;
		dai.pSetLayouts = &dsLayout_;
		dai.descriptorPool = dev.dsPool;
		VK_CHECK(dev.dispatch.AllocateDescriptorSets(dev.handle, &dai, &blurDs_));
		nameHandle(dev, blurDs_, "Gui:blurDs");
	}

	// init tabs
	// TODO: use RAII for init
	tabs_.resources = std::make_unique<ResourceGui>();
	tabs_.cb = std::make_unique<CommandBufferGui>();

	tabs_.resources->init(*this);
	tabs_.cb->init(*this);

	// TODO: likely needs a lock. Shouldn't be done here in first place
	// i guess but at Gui creation, where an existing gui object could
	// be moved.
	dlg_assert(dev.gui == nullptr);
	dev.gui = this;
}

void Gui::initImGui() {
	// Init imgui
	this->imgui_ = ImGui::CreateContext();
	ImGui::SetCurrentContext(imgui_);
	this->io_ = &ImGui::GetIO();
	this->io_->IniFilename = nullptr;
	this->io_->MouseDrawCursor = false;

	// TODO: also add gamepad support
	// TODO: figure out how to make our custom selectables (using IsItemClicked) work
	this->io_->ConfigFlags |= ImGuiConfigFlags_NavEnableKeyboard;

	auto& io = *this->io_;
	io.KeyMap[ImGuiKey_A] = VilKeyA;
	io.KeyMap[ImGuiKey_C] = VilKeyC;
	io.KeyMap[ImGuiKey_V] = VilKeyV;
	io.KeyMap[ImGuiKey_X] = VilKeyX;
	io.KeyMap[ImGuiKey_Y] = VilKeyY;
	io.KeyMap[ImGuiKey_Z] = VilKeyZ;
	io.KeyMap[ImGuiKey_Enter] = VilKeyEnter;
	io.KeyMap[ImGuiKey_Delete] = VilKeyDelete;
	io.KeyMap[ImGuiKey_Space] = VilKeySpace;
	io.KeyMap[ImGuiKey_LeftArrow] = VilKeyLeft;
	io.KeyMap[ImGuiKey_DownArrow] = VilKeyDown;
	io.KeyMap[ImGuiKey_RightArrow] = VilKeyRight;
	io.KeyMap[ImGuiKey_UpArrow] = VilKeyUp;
	io.KeyMap[ImGuiKey_Escape] = VilKeyEscape;
	io.KeyMap[ImGuiKey_Tab] = VilKeyTab;
	io.KeyMap[ImGuiKey_Backspace] = VilKeyBackspace;

	static const ImWchar rangesBasic[] = {
		0x0020, 0x00FF, // Basic Latin + Latin Supplement
		0x03BC, 0x03BC, // micro
		0x03C3, 0x03C3, // small sigma
		0x2013, 0x2013, // en dash
		0x2264, 0x2264, // less-than or equal to
		0,
	};

    static const ImWchar rangesIcons[] = {
        ICON_MIN_FA, ICON_MAX_FA,
        0
    };

	ImFontConfig configOwned;
	configOwned.FontDataOwnedByAtlas = false;
    // configOwned.FontBuilderFlags = ImGuiFreeTypeBuilderFlags_LightHinting;

    ImFontConfig configMerge;
    configMerge.MergeMode = true;
    // configMerge.FontBuilderFlags = ImGuiFreeTypeBuilderFlags_LightHinting;

	defaultFont = io.Fonts->AddFontFromMemoryCompressedTTF(arimo_compressed_data,
		arimo_compressed_size, 15.f, nullptr, rangesBasic);
	io.Fonts->AddFontFromMemoryCompressedTTF(fontAwesomeSolid_compressed_data,
		fontAwesomeSolid_compressed_size, 14.f, &configMerge, rangesIcons);

	// TODO: compress
	monoFont = io.Fonts->AddFontFromMemoryTTF((void*) inconsolata_compressed_data,
		inconsolata_compressed_size, 15.f, &configOwned, rangesBasic);

	// Apply style
	ImGui::StyleColorsDark();
	auto& style = ImGui::GetStyle();

	auto accentHue = 0.f; // red

	auto setColorHSV = [&](ImGuiCol_ col, u8 h, u8 s, u8 v, u8 a) {
		float r, g, b;
		ImGui::ColorConvertHSVtoRGB(h / 255.f, s / 255.f, v / 255.f, r, g, b);
		style.Colors[col] = {r, g, b, a / 255.f};
	};

	// hsv + alpha. H will always be accentHue
	auto setAccentColor = [&](ImGuiCol_ col, u8 s, u8 v, u8 a) {
		setColorHSV(col, accentHue, s, v, a);
	};

	setAccentColor(ImGuiCol_CheckMark, 187, 250, 255);
	setAccentColor(ImGuiCol_SliderGrab, 185, 224, 255);
	setAccentColor(ImGuiCol_SliderGrabActive, 187, 250, 255);

	setAccentColor(ImGuiCol_Button, 187, 250, 102);
	setAccentColor(ImGuiCol_ButtonHovered, 187, 250, 255);
	setAccentColor(ImGuiCol_ButtonActive, 187, 239, 255);

	setAccentColor(ImGuiCol_Header, 187, 250, 79);
	setAccentColor(ImGuiCol_HeaderHovered, 187, 250, 204);
	setAccentColor(ImGuiCol_HeaderActive, 187, 250, 255);

	style.Colors[ImGuiCol_Separator].w = 0.1;
	setAccentColor(ImGuiCol_SeparatorHovered, 221, 191, 150);
	setAccentColor(ImGuiCol_SeparatorActive, 221, 191, 255);

	setAccentColor(ImGuiCol_ResizeGrip, 187, 250, 50);
	setAccentColor(ImGuiCol_ResizeGripHovered, 187, 250, 170);
	setAccentColor(ImGuiCol_ResizeGripActive, 187, 250, 240);

	setAccentColor(ImGuiCol_Tab, 176, 148, 100);
	setAccentColor(ImGuiCol_TabHovered, 187, 250, 200);
	setAccentColor(ImGuiCol_TabActive, 180, 173, 255);

	setAccentColor(ImGuiCol_FrameBg, 170, 122, 138);
	setAccentColor(ImGuiCol_FrameBgHovered, 187, 170, 150);
	setAccentColor(ImGuiCol_FrameBgActive, 187, 190, 240);

	style.Colors[ImGuiCol_WindowBg] = {0.02, 0.02, 0.02, 0.6}; // dark

	setColorHSV(ImGuiCol_PlotHistogram, 119, 240, 180, 240);
	setColorHSV(ImGuiCol_PlotHistogramHovered, accentHue, 187, 250, 240);

	// style.Colors[ImGuiCol_TitleBgActive] = style.Colors[ImGuiCol_WindowBg];
	// style.Colors[ImGuiCol_TitleBg] = style.Colors[ImGuiCol_WindowBg];
	setColorHSV(ImGuiCol_TitleBgActive, 119, 240, 150, 200);
	setColorHSV(ImGuiCol_TitleBg, 119, 100, 100, 50);
	style.Colors[ImGuiCol_TitleBgCollapsed] = style.Colors[ImGuiCol_WindowBg];

	// Disable all rounding
	style.WindowRounding = 0.f;
	style.WindowBorderSize = 0.f;
	style.FrameBorderSize = 0.f;
	style.TabRounding = 0.f;
	style.PopupRounding = 0.f;
	style.GrabRounding = 0.f;
	style.ScrollbarRounding = 0.f;

	// Space a bit more vertically, makes information look less overwhelming.
	// Don't overdo it though, we intentionally want it compact.
	style.ItemSpacing = {8, 6};
	style.FramePadding = {4, 4};
	style.ItemInnerSpacing = {4, 4};
	style.CellPadding = {6, 1}; // we need this since we sometimes don't use lines

	// Center window title
	style.WindowTitleAlign = {0.5f, 0.5f};
	style.Alpha = 1.f;
}

// ~Gui
Gui::~Gui() {
	if(!dev_) {
		return;
	}

	// TODO: needs a lock. Likely also shouldn't be here.
	dlg_assert(dev_->gui == this);
	dev_->gui = nullptr;

	waitForDraws();
	for(auto& draw : draws_) {
		if(draw.inUse) {
			finishedLocked(draw);
		}
	}

	draws_.clear();

	if(imgui_) {
		ImGui::DestroyContext(imgui_);
	}

	auto vkDev = dev_->handle;
	dev_->dispatch.DestroyDescriptorSetLayout(vkDev, dsLayout_, nullptr);
	dev_->dispatch.DestroyPipelineLayout(vkDev, pipeLayout_, nullptr);
	dev_->dispatch.DestroyDescriptorSetLayout(vkDev, imgOpDsLayout_, nullptr);
	dev_->dispatch.DestroyPipelineLayout(vkDev, imgOpPipeLayout_, nullptr);
	dev_->dispatch.DestroyDescriptorSetLayout(vkDev, histogramDsLayout_, nullptr);
	dev_->dispatch.DestroyPipelineLayout(vkDev, histogramPipeLayout_, nullptr);

	dev_->dispatch.DestroyBuffer(vkDev, font_.uploadBuf, nullptr);
	dev_->dispatch.FreeMemory(vkDev, font_.uploadMem, nullptr);
	dev_->dispatch.DestroyImageView(vkDev, font_.view, nullptr);
	dev_->dispatch.DestroyImage(vkDev, font_.image, nullptr);
	dev_->dispatch.FreeMemory(vkDev, font_.mem, nullptr);

	dev_->dispatch.DestroyPipeline(vkDev, pipes_.gui, nullptr);
	dev_->dispatch.DestroyPipeline(vkDev, pipes_.imageBg, nullptr);
	dev_->dispatch.DestroyPipeline(vkDev, pipes_.histogramPrepare, nullptr);
	dev_->dispatch.DestroyPipeline(vkDev, pipes_.histogramMax, nullptr);
	dev_->dispatch.DestroyPipeline(vkDev, pipes_.histogramRender, nullptr);
	for(auto i = 0u; i < ShaderImageType::count; ++i) {
		dev_->dispatch.DestroyPipeline(vkDev, pipes_.image[i], nullptr);
		dev_->dispatch.DestroyPipeline(vkDev, pipes_.readTex[i], nullptr);
		dev_->dispatch.DestroyPipeline(vkDev, pipes_.minMaxTex[i], nullptr);
		dev_->dispatch.DestroyPipeline(vkDev, pipes_.histogramTex[i], nullptr);
	}

	dev_->dispatch.DestroyRenderPass(vkDev, rp_, nullptr);
	dev_->dispatch.DestroyCommandPool(vkDev, commandPool_, nullptr);
}

// Renderer
void Gui::ensureFontAtlas(VkCommandBuffer cb) {
	if(font_.uploaded) {
		return;
	}

	auto& dev = *this->dev_;

	ImGuiIO& io = ImGui::GetIO();
	unsigned char* pixels;
	int width, height;
	// TODO: use a8
	io.Fonts->GetTexDataAsRGBA32(&pixels, &width, &height);
	size_t uploadSize = width * height * 4 * sizeof(char);

	// Create atlas image
	VkImageCreateInfo ici {};
	ici.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
	ici.imageType = VK_IMAGE_TYPE_2D;
	ici.format = VK_FORMAT_R8G8B8A8_UNORM;
	ici.extent.width = width;
	ici.extent.height = height;
	ici.extent.depth = 1;
	ici.mipLevels = 1;
	ici.arrayLayers = 1;
	ici.samples = VK_SAMPLE_COUNT_1_BIT;
	ici.tiling = VK_IMAGE_TILING_OPTIMAL;
	ici.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;
	ici.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
	ici.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;

	VK_CHECK(dev.dispatch.CreateImage(dev.handle, &ici, nullptr, &font_.image));

	VkMemoryRequirements fontImageReq;
	dev.dispatch.GetImageMemoryRequirements(dev.handle, font_.image, &fontImageReq);

	VkMemoryAllocateInfo iai {};
	iai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
	iai.allocationSize = fontImageReq.size;
	iai.memoryTypeIndex = findLSB(fontImageReq.memoryTypeBits & dev.deviceLocalMemTypeBits);
	VK_CHECK(dev.dispatch.AllocateMemory(dev.handle, &iai, nullptr, &font_.mem));
	VK_CHECK(dev.dispatch.BindImageMemory(dev.handle, font_.image, font_.mem, 0));

	// font image view
	VkImageViewCreateInfo ivi {};
	ivi.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
	ivi.image = font_.image;
	ivi.viewType = VK_IMAGE_VIEW_TYPE_2D;
	ivi.format = VK_FORMAT_R8G8B8A8_UNORM;
	ivi.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
	ivi.subresourceRange.levelCount = 1;
	ivi.subresourceRange.layerCount = 1;
	VK_CHECK(dev.dispatch.CreateImageView(dev.handle, &ivi, nullptr, &font_.view));

	// Create the upload buffer
	VkBufferCreateInfo bci {};
	bci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
	bci.size = uploadSize;
	bci.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
	VK_CHECK(dev.dispatch.CreateBuffer(dev.handle, &bci, nullptr, &font_.uploadBuf));

	VkMemoryRequirements uploadBufReq;
	dev.dispatch.GetBufferMemoryRequirements(dev.handle, font_.uploadBuf, &uploadBufReq);

	VkMemoryAllocateInfo uploadai {};
	uploadai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
	uploadai.allocationSize = uploadBufReq.size;
	uploadai.memoryTypeIndex = findLSB(uploadBufReq.memoryTypeBits & dev.hostVisibleMemTypeBits);
	VK_CHECK(dev.dispatch.AllocateMemory(dev.handle, &uploadai, nullptr, &font_.uploadMem));
	VK_CHECK(dev.dispatch.BindBufferMemory(dev.handle, font_.uploadBuf, font_.uploadMem, 0));

	// Upload to Buffer
	char* map = NULL;
	VK_CHECK(dev.dispatch.MapMemory(dev.handle, font_.uploadMem, 0, VK_WHOLE_SIZE, 0, (void**)(&map)));
	std::memcpy(map, pixels, uploadSize);

	VkMappedMemoryRange range[1] {};
	range[0].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
	range[0].memory = font_.uploadMem;
	range[0].size = uploadSize;
	VK_CHECK(dev.dispatch.FlushMappedMemoryRanges(dev.handle, 1, range));
	dev.dispatch.UnmapMemory(dev.handle, font_.uploadMem);

	// Copy buffer to image
	VkImageMemoryBarrier copyBarrier[1] = {};
	copyBarrier[0].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
	copyBarrier[0].dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
	copyBarrier[0].oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
	copyBarrier[0].newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
	copyBarrier[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
	copyBarrier[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
	copyBarrier[0].image = font_.image;
	copyBarrier[0].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
	copyBarrier[0].subresourceRange.levelCount = 1;
	copyBarrier[0].subresourceRange.layerCount = 1;
	dev.dispatch.CmdPipelineBarrier(cb,
		VK_PIPELINE_STAGE_HOST_BIT,
		VK_PIPELINE_STAGE_TRANSFER_BIT,
		0, 0, NULL, 0, NULL,
		1, copyBarrier);

	VkBufferImageCopy region = {};
	region.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
	region.imageSubresource.layerCount = 1;
	region.imageExtent.width = width;
	region.imageExtent.height = height;
	region.imageExtent.depth = 1;
	dev.dispatch.CmdCopyBufferToImage(cb,
		font_.uploadBuf,
		font_.image,
		VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
		1, &region);

	VkImageMemoryBarrier useBarrier[1] = {};
	useBarrier[0].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
	useBarrier[0].srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
	useBarrier[0].dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
	useBarrier[0].oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
	useBarrier[0].newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
	useBarrier[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
	useBarrier[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
	useBarrier[0].image = font_.image;
	useBarrier[0].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
	useBarrier[0].subresourceRange.levelCount = 1;
	useBarrier[0].subresourceRange.layerCount = 1;
	dev.dispatch.CmdPipelineBarrier(cb,
		VK_PIPELINE_STAGE_TRANSFER_BIT,
		VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
		0,
		0, NULL,
		0, NULL,
		1, useBarrier);

	// create descriptor
	VkDescriptorSetAllocateInfo dsai {};
	dsai.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
	dsai.descriptorPool = dev.dsPool;
	dsai.descriptorSetCount = 1u;
	dsai.pSetLayouts = &dsLayout_;
	VK_CHECK(dev.dispatch.AllocateDescriptorSets(dev.handle, &dsai, &dsFont_));

	// ...and update it
	VkDescriptorImageInfo dsii;
	dsii.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
	dsii.imageView = font_.view;
	dsii.sampler = dev.linearSampler;

	VkWriteDescriptorSet write {};
	write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
	write.descriptorCount = 1u;
	write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
	write.dstSet = dsFont_;
	write.pImageInfo = &dsii;

	dev.dispatch.UpdateDescriptorSets(dev.handle, 1, &write, 0, nullptr);

	// Store our identifier
	font_.drawImage.type = ShaderImageType::count; // font
	io.Fonts->TexID = (ImTextureID) &font_.drawImage;
	font_.uploaded = true;
}

void Gui::uploadDraw(Draw& draw, const ImDrawData& drawData) {
	ZoneScoped;
	auto& dev = *this->dev_;
	if(drawData.TotalIdxCount == 0) {
		return;
	}

	// make sure buffers are large enough
	auto vertexUsage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
	auto vertexSize = drawData.TotalVtxCount * sizeof(ImDrawVert);
	draw.vertexBuffer.ensure(dev, vertexSize, vertexUsage);

	auto indexUsage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT;
	auto indexSize = drawData.TotalIdxCount * sizeof(ImDrawIdx);
	draw.indexBuffer.ensure(dev, indexSize, indexUsage);

	// map
	ImDrawVert* verts = reinterpret_cast<ImDrawVert*>(draw.vertexBuffer.map);
	ImDrawIdx* inds = reinterpret_cast<ImDrawIdx*>(draw.indexBuffer.map);

	for(auto i = 0; i < drawData.CmdListsCount; ++i) {
		auto& cmds = *drawData.CmdLists[i];
		std::memcpy(verts, cmds.VtxBuffer.Data, cmds.VtxBuffer.size() * sizeof(ImDrawVert));
		std::memcpy(inds, cmds.IdxBuffer.Data, cmds.IdxBuffer.size() * sizeof(ImDrawIdx));
		verts += cmds.VtxBuffer.Size;
		inds += cmds.IdxBuffer.Size;
	}

	draw.indexBuffer.flushMap();
	draw.vertexBuffer.flushMap();
}

void Gui::recordDraw(Draw& draw, VkExtent2D extent, VkFramebuffer,
		const ImDrawData& drawData) {
	ZoneScoped;
	DebugLabel cblbl(dev(), draw.cb, "vil:Gui:recordDraw");

	auto& dev = *dev_;
	if(drawData.TotalIdxCount == 0 && !clear_) {
		return;
	}

	if(drawData.TotalIdxCount > 0) {
		VkViewport viewport {};
		viewport.width = extent.width;
		viewport.height = extent.height;
		viewport.maxDepth = 1.f;
		dev.dispatch.CmdSetViewport(draw.cb, 0, 1, &viewport);

		VkDeviceSize off0 = 0u;
		dev.dispatch.CmdBindVertexBuffers(draw.cb, 0, 1, &draw.vertexBuffer.buf, &off0);
		dev.dispatch.CmdBindIndexBuffer(draw.cb, draw.indexBuffer.buf, 0, VK_INDEX_TYPE_UINT16);

		float pcr[4];
		// scale
		pcr[0] = 2.0f / drawData.DisplaySize.x;
		pcr[1] = 2.0f / drawData.DisplaySize.y;
		// translate
		pcr[2] = -1.0f - drawData.DisplayPos.x * pcr[0];
		pcr[3] = -1.0f - drawData.DisplayPos.y * pcr[1];
		auto pcrStages = VK_SHADER_STAGE_VERTEX_BIT |
			VK_SHADER_STAGE_FRAGMENT_BIT |
			VK_SHADER_STAGE_COMPUTE_BIT;
		dev.dispatch.CmdPushConstants(draw.cb, pipeLayout_, pcrStages,
			0, sizeof(pcr), pcr);

		auto idxOff = 0u;
		auto vtxOff = 0u;
		for(auto i = 0; i < drawData.CmdListsCount; ++i) {
			auto& cmds = *drawData.CmdLists[i];

			for(auto j = 0; j < cmds.CmdBuffer.Size; ++j) {
				auto& cmd = cmds.CmdBuffer[j];
				if(cmd.UserCallback) {
					dlg_assert(cmd.UserCallback != ImDrawCallback_ResetRenderState);
					cmd.UserCallback(&cmds, &cmd);

					// reset state we need
					dev.dispatch.CmdPushConstants(draw.cb, pipeLayout_,
						pcrStages, 0, sizeof(pcr), pcr);
					dev.dispatch.CmdSetViewport(draw.cb, 0, 1, &viewport);
					dev.dispatch.CmdBindVertexBuffers(draw.cb, 0, 1, &draw.vertexBuffer.buf, &off0);
					dev.dispatch.CmdBindIndexBuffer(draw.cb, draw.indexBuffer.buf, 0, VK_INDEX_TYPE_UINT16);
					dev.dispatch.CmdPushConstants(draw.cb, pipeLayout_,
						pcrStages, 0, sizeof(pcr), pcr);
				} else {
					VkDescriptorSet ds = dsFont_;
					VkPipeline pipe = pipes_.gui;
					auto img = (DrawGuiImage*) cmd.TextureId;
					if(img && img->type != ShaderImageType::count) {
						ds = img->ds;
						dlg_assert(img->type < ShaderImageType::count);
						pipe = pipes_.image[img->type];

						// bind push constant data
						struct PcrImageData {
							float layer;
							float valMin;
							float valMax;
							u32 flags;
							float level;
						} pcr = {
							img->layer,
							img->minValue,
							img->maxValue,
							img->flags,
							img->level,
						};

						dev.dispatch.CmdPushConstants(draw.cb, pipeLayout_,
							pcrStages, 16,
							sizeof(pcr), &pcr);
					}

					dev.dispatch.CmdBindPipeline(draw.cb, VK_PIPELINE_BIND_POINT_GRAPHICS, pipe);
					dev.dispatch.CmdBindDescriptorSets(draw.cb, VK_PIPELINE_BIND_POINT_GRAPHICS,
						pipeLayout_, 0, 1, &ds, 0, nullptr);

					VkRect2D scissor {};
					scissor.offset.x = std::max<int>(cmd.ClipRect.x - drawData.DisplayPos.x, 0);
					scissor.offset.y = std::max<int>(cmd.ClipRect.y - drawData.DisplayPos.y, 0);
					scissor.extent.width = cmd.ClipRect.z - cmd.ClipRect.x;
					scissor.extent.height = cmd.ClipRect.w - cmd.ClipRect.y;
					dev.dispatch.CmdSetScissor(draw.cb, 0, 1, &scissor);

					dev.dispatch.CmdDrawIndexed(draw.cb, cmd.ElemCount, 1, idxOff, vtxOff, 0);
					idxOff += cmd.ElemCount;
				}
			}

			vtxOff += cmds.VtxBuffer.Size;
		}
	}
}

std::vector<std::string> enabledFeatures(Device& dev) {
	std::vector<std::string> ret;

#define FEATURE(x) if(dev.enabledFeatures.x) ret.push_back(#x);
	FEATURE(robustBufferAccess);
	FEATURE(fullDrawIndexUint32);
	FEATURE(imageCubeArray);
	FEATURE(independentBlend);
	FEATURE(geometryShader);
	FEATURE(tessellationShader);
	FEATURE(sampleRateShading);
	FEATURE(dualSrcBlend);
	FEATURE(logicOp);
	FEATURE(multiDrawIndirect);
	FEATURE(drawIndirectFirstInstance);
	FEATURE(depthClamp);
	FEATURE(depthBiasClamp);
	FEATURE(fillModeNonSolid);
	FEATURE(depthBounds);
	FEATURE(wideLines);
	FEATURE(largePoints);
	FEATURE(alphaToOne);
	FEATURE(multiViewport);
	FEATURE(samplerAnisotropy);
	FEATURE(textureCompressionETC2);
	FEATURE(textureCompressionASTC_LDR);
	FEATURE(textureCompressionBC);
	FEATURE(occlusionQueryPrecise);
	FEATURE(pipelineStatisticsQuery);
	FEATURE(vertexPipelineStoresAndAtomics);
	FEATURE(fragmentStoresAndAtomics);
	FEATURE(shaderTessellationAndGeometryPointSize);
	FEATURE(shaderImageGatherExtended);
	FEATURE(shaderStorageImageExtendedFormats);
	FEATURE(shaderStorageImageMultisample);
	FEATURE(shaderStorageImageReadWithoutFormat);
	FEATURE(shaderStorageImageWriteWithoutFormat);
	FEATURE(shaderUniformBufferArrayDynamicIndexing);
	FEATURE(shaderSampledImageArrayDynamicIndexing);
	FEATURE(shaderStorageBufferArrayDynamicIndexing);
	FEATURE(shaderStorageImageArrayDynamicIndexing);
	FEATURE(shaderClipDistance);
	FEATURE(shaderCullDistance);
	FEATURE(shaderFloat64);
	FEATURE(shaderInt64);
	FEATURE(shaderInt16);
	FEATURE(shaderResourceResidency);
	FEATURE(shaderResourceMinLod);
	FEATURE(sparseBinding);
	FEATURE(sparseResidencyBuffer);
	FEATURE(sparseResidencyImage2D);
	FEATURE(sparseResidencyImage3D);
	FEATURE(sparseResidency2Samples);
	FEATURE(sparseResidency4Samples);
	FEATURE(sparseResidency8Samples);
	FEATURE(sparseResidency16Samples);
	FEATURE(sparseResidencyAliased);
	FEATURE(variableMultisampleRate);
	FEATURE(inheritedQueries);
#undef FEATURE

	return ret;
}

void Gui::drawOverviewUI(Draw& draw) {
	(void) draw;

	auto& dev = *this->dev_;
	auto& ini = *dev.ini;

	// instance info
	ImGui::Columns(2);

	ImGui::Text("API Version");

	if(ini.app.valid) {
		ImGui::Text("Application");
		ImGui::Text("Engine");
	}

	ImGui::NextColumn();

	ImGui::Text("%d.%d.%d",
		VK_VERSION_MAJOR(ini.app.apiVersion),
		VK_VERSION_MINOR(ini.app.apiVersion),
		VK_VERSION_PATCH(ini.app.apiVersion));

	if(ini.app.valid) {
		ImGui::Text("%s %d.%d.%d", ini.app.name.c_str(),
			VK_VERSION_MAJOR(ini.app.version),
			VK_VERSION_MINOR(ini.app.version),
			VK_VERSION_PATCH(ini.app.version));
		ImGui::Text("%s %d.%d.%d", ini.app.engineName.c_str(),
			VK_VERSION_MAJOR(ini.app.engineVersion),
			VK_VERSION_MINOR(ini.app.engineVersion),
			VK_VERSION_PATCH(ini.app.engineVersion));
	}

	ImGui::Columns();

	ImGui::Separator();

	// phdev info
	ImGui::Columns(2);

	// physical device info
	ImGui::Text("Physical device, API version");
	ImGui::Text("Driver version");

	ImGui::NextColumn();

	ImGui::Text("%s %d.%d.%d", dev.props.deviceName,
		VK_VERSION_MAJOR(dev.props.apiVersion),
		VK_VERSION_MINOR(dev.props.apiVersion),
		VK_VERSION_PATCH(dev.props.apiVersion));
	ImGui::Text("%d.%d.%d",
		VK_VERSION_MAJOR(dev.props.driverVersion),
		VK_VERSION_MINOR(dev.props.driverVersion),
		VK_VERSION_PATCH(dev.props.driverVersion));

	ImGui::Columns();
	ImGui::Separator();

	// Enabled instance extensions
	ImGui::PushStyleVar(ImGuiStyleVar_FramePadding, ImVec2(2.f, 3.f));
	ImGui::PushStyleVar(ImGuiStyleVar_ItemSpacing, ImVec2(4.f, 4.f));

	auto iniExtLbl = dlg::format("{} instance extensions enabled", dev.ini->extensions.size());
	auto tnFlags = ImGuiTreeNodeFlags_FramePadding;
	if(ImGui::TreeNodeEx(iniExtLbl.c_str(), tnFlags)) {
		ImGui::Indent();
		for(auto& ext : dev.ini->extensions) {
			imGuiText("{}", ext);
		}
		ImGui::Unindent();
		ImGui::TreePop();
	}

	auto devExtLbl = dlg::format("{} device extensions enabled", dev.appExts.size());
	if(ImGui::TreeNodeEx(devExtLbl.c_str(), tnFlags)) {
		ImGui::Indent();
		for(auto& ext : dev.appExts) {
			imGuiText("{}", ext);
		}
		ImGui::Unindent();
		ImGui::TreePop();
	}

	auto features = enabledFeatures(dev);
	auto featuresLbl = dlg::format("{} device features enabled", features.size());
	if(ImGui::TreeNodeEx(featuresLbl.c_str(), tnFlags)) {
		ImGui::Indent();
		for(auto& f : features) {
			imGuiText("{}", f);
		}
		ImGui::Unindent();
		ImGui::TreePop();
	}

	ImGui::PopStyleVar(2);
	ImGui::Separator();

	// swapchain stuff
	IntrusivePtr<Swapchain> swapchain;

	{
		std::lock_guard lock(dev.mutex);
		swapchain = dev.swapchain;
	}

	if(swapchain) {
		if(ImGui::Button("View per-frame submissions")) {
			cbGui().showSwapchainSubmissions();
			activateTab(Tab::commandBuffer);
		} else if(showHelp && ImGui::IsItemHovered()) {
			ImGui::SetTooltip(
				"This will open the tab to view all submissions done between two\n"
				"presents to the main swapchain. You can alternatively select\n"
				"specific CommandBuffers from the 'Resources' tab to view their content.");
		}

		// show timings
		std::vector<float> hist;

		{
			std::lock_guard lock(dev.mutex);
			for(auto& timing : dev.swapchain->frameTimings) {
				using MS = std::chrono::duration<float, std::ratio<1, 1000>>;
				hist.push_back(std::chrono::duration_cast<MS>(timing).count());
			}
		}

		// TODO: the histogram has several problems:
		// - very high outliers will render all other timings useless (since
		//   they are scaled down too much)
		// - the variable scaling the make it weird to get an absolute
		//   idea of the timings, only relative is possible
		if(!hist.empty()) {
			ImGui::Text("Present timings:");

			ImGui::PushStyleColor(ImGuiCol_FrameBg, {0.f, 0.f, 0.f, 0.f});
			ImGui::PushStyleColor(ImGuiCol_FrameBgActive, {0.f, 0.f, 0.f, 0.f});
			ImGui::PushStyleColor(ImGuiCol_FrameBgActive, {0.f, 0.f, 0.f, 0.f});

			float w = ImGui::GetContentRegionAvail().x;
			ImGui::PlotHistogram("", hist.data(), int(hist.size()),
				0, nullptr, 0.f, FLT_MAX, {w, 100});

			ImGui::PopStyleColor();
			ImGui::PopStyleColor();
			ImGui::PopStyleColor();
		}
	}

	// pretty much just own debug stuff
	ImGui::Separator();

	if(checkEnvBinary("VIL_DEBUG", true)) {
		auto& stats = DebugStats::get();
		imGuiText("alive records: {}", stats.aliveRecords);
		imGuiText("alive descriptor sets: {}", stats.aliveDescriptorSets);
		imGuiText("alive descriptor copies: {}", stats.aliveDescriptorCopies);
		imGuiText("alive buffers: {}", stats.aliveBuffers);
		imGuiText("alive image views: {}", stats.aliveImagesViews);
		imGuiText("threadContext memory: {} MB", stats.threadContextMem / (1024.f * 1024.f));
		imGuiText("command memory: {} MB", stats.commandMem / (1024.f * 1024.f));
		imGuiText("ds copy memory: {} MB", stats.descriptorCopyMem / (1024.f * 1024.f));
		imGuiText("ds pool memory: {} MB", stats.descriptorPoolMem / (1024.f * 1024.f));
		imGuiText("alive hook records: {}", stats.aliveHookRecords);
		imGuiText("alive hook states: {}", stats.aliveHookStates);
		imGuiText("layer buffer memory: {} MB", stats.ownBufferMem / (1024.f * 1024.f));
		imGuiText("layer image memory: {} MB", stats.copiedImageMem / (1024.f * 1024.f));
		ImGui::Separator();
		imGuiText("timeline semaphores: {}", dev.timelineSemaphores);
		imGuiText("transform feedback: {}", dev.transformFeedback);
		imGuiText("wrap command buffers: {}", HandleDesc<VkCommandBuffer>::wrap);
		imGuiText("wrap image view: {}", HandleDesc<VkImageView>::wrap);
		imGuiText("wrap buffers: {}", HandleDesc<VkBuffer>::wrap);
		imGuiText("wrap descriptor set: {}", HandleDesc<VkDescriptorSet>::wrap);
		imGuiText("wrap samplers: {}", HandleDesc<VkSampler>::wrap);
		imGuiText("wrap device: {}", HandleDesc<VkDevice>::wrap);
		ImGui::Separator();
		imGuiText("submission counter: {}", dev.submissionCounter);
		imGuiText("pending submissions: {}", dev.pending.size());
		imGuiText("fence pool size: {}", dev.fencePool.size());
		imGuiText("semaphore pool size: {}", dev.semaphorePool.size());
		imGuiText("reset semaphores size: {}", dev.resetSemaphores.size());

		ImGui::Separator();

		if(dev.timelineSemaphores) {
			auto val = dev.doFullSync.load();
			ImGui::Checkbox("Full-Sync", &val);
			dev.doFullSync.store(val);
			if(ImGui::IsItemHovered() && showHelp) {
				ImGui::SetTooltip("Causes over-conservative synchronization of\n"
					"inserted layer commands.\n"
					"Might fix synchronization in some corner cases\n"
					"and is needed when your application accesses buffers\n"
					"by just using device addresses");
			}
		}

		auto val = dev.captureCmdStack.load();
		ImGui::Checkbox("Capture Command Callstacks", &val);
		dev.captureCmdStack.store(val);
		if(ImGui::IsItemHovered() && showHelp) {
			ImGui::SetTooltip("Captures and shows callstacks of each command");
		}

		ImGui::Checkbox("Show ImGui Demo", &showImguiDemo_);
	}
}

void Gui::drawMemoryUI(Draw&) {
	// TODO:
	// - display graphs instead of just the table
	// - show memory types?
	// - show the biggest actual allocations; some more statistics in general

	// accumulate allocation sizes per heap
	// TODO: cache this.
	auto& memProps = dev().memProps;
	VkDeviceSize heapAlloc[VK_MAX_MEMORY_HEAPS] {};

	{
		std::lock_guard lock(dev().mutex);
		for(auto& [_, mem] : dev().deviceMemories.inner) {
			auto heap = memProps.memoryTypes[mem->typeIndex].heapIndex;
			heapAlloc[heap] += mem->size;
		}
	}

	VkPhysicalDeviceMemoryBudgetPropertiesEXT memBudget {};
	auto hasMemBudget = contains(dev().allExts, VK_EXT_MEMORY_BUDGET_EXTENSION_NAME);
	auto cols = 3u;
	if(hasMemBudget) {
		auto& ini = *dev().ini;
		dlg_assert(ini.dispatch.GetPhysicalDeviceMemoryProperties2);

		memBudget.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT;

		VkPhysicalDeviceMemoryProperties2 memProps2 {};
		memProps2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PROPERTIES_2;
		memProps2.pNext = &memBudget;

		ini.dispatch.GetPhysicalDeviceMemoryProperties2(dev().phdev, &memProps2);

		cols += 2;
	}

	auto flags = ImGuiTableFlags_Resizable | ImGuiTableFlags_Borders;
	if(ImGui::BeginTable("Memory Heaps", cols, flags)) {
		ImGui::TableSetupColumn("Heap");

		ImGui::TableSetupColumn("Heap Size");
		ImGui::TableSetupColumn("Sum of Allocs");

		if(hasMemBudget) {
			ImGui::TableSetupColumn("Heap Budget");
			ImGui::TableSetupColumn("Heap Own Usage");
		}

		ImGui::TableHeadersRow();

		auto printVal = [&](auto val) {
			// TODO: not always use MB, switch dynamically based on size
			auto block = 1024.f * 1024.f;
			auto prec = block > 10.f ? 0u : 2u;
			imGuiText("{}{}{} MB", std::fixed, std::setprecision(prec), val / block);
		};

		for(auto i = 0u; i < memProps.memoryHeapCount; ++i) {
			ImGui::TableNextRow();
			ImGui::TableNextColumn();

			auto add = "";
			if(memProps.memoryHeaps[i].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) {
				add = " [dev]";
			}

			imGuiText("{}{}", i, add);


			ImGui::TableNextColumn();
			auto sizeMB = memProps.memoryHeaps[i].size;
			printVal(sizeMB);

			ImGui::TableNextColumn();
			auto allocMB = heapAlloc[i];
			printVal(allocMB);

			if(hasMemBudget) {
				ImGui::TableNextColumn();
				auto budgetMB = memBudget.heapBudget[i];
				printVal(budgetMB);

				ImGui::TableNextColumn();
				auto usageMB = memBudget.heapUsage[i];
				printVal(usageMB);
			}
		}

		ImGui::EndTable();
	}
}

void Gui::draw(Draw& draw, bool fullscreen) {
	ZoneScoped;

	ImGui::NewFrame();

	unsigned flags = ImGuiWindowFlags_NoCollapse;
	if(fullscreen) {
		ImGui::SetNextWindowPos({0, 0});
		ImGui::SetNextWindowSize(ImGui::GetIO().DisplaySize);
		flags = ImGuiWindowFlags_NoDecoration;
	} else {
		ImGui::SetNextWindowPos({80, 80}, ImGuiCond_Once);
		ImGui::SetNextWindowSize({900, 550}, ImGuiCond_Once);
	}

	if(showImguiDemo_) {
		ImGui::ShowDemoWindow();
		ImGui::ShowMetricsWindow();
		// ImGui::ShowAboutWindow();
	}

	auto checkSelectTab = [&](Tab tab) {
		auto flags = 0;
		if(activeTab_ == tab && activateTabCounter_ < 2) {
			flags = ImGuiTabItemFlags_SetSelected;
			++activateTabCounter_;
		}

		return flags;
	};

	// TODO: needed?
	// if(this->focused) {
	// 	ImGui::SetNextWindowFocus();
	// } else {
	// 	ImGui::SetWindowFocus(nullptr);
	// }
	if(this->unfocus) {
		ImGui::SetWindowFocus(nullptr);
		this->unfocus = false;
	}

	if(activeTab_ != Tab::commandBuffer) {
		// deactivate hook when we aren't in the commandbuffer tab
		dev().commandHook->freeze.store(true);
	}

	if(ImGui::Begin("Vulkan Introspection", nullptr, flags)) {
		windowPos_ = ImGui::GetWindowPos();
		windowSize_ = ImGui::GetWindowSize();

		if(ImGui::BeginTabBar("MainTabBar")) {
			if(ImGui::BeginTabItem("Overview")) {
				activeTab_ = Tab::overview;
				drawOverviewUI(draw);
				ImGui::EndTabItem();
			}

			if(ImGui::BeginTabItem("Resources", nullptr, checkSelectTab(Tab::resources))) {
				// When switching towards the resources tab, make sure to refresh
				// the list of available resources, not showing "<Destroyed>"
				if(activeTab_ != Tab::resources) {
					tabs_.resources->firstUpdate_ = true;
					activeTab_ = Tab::resources;
				}

				tabs_.resources->draw(draw);
				ImGui::EndTabItem();
			}

			if(ImGui::BeginTabItem("Memory", nullptr, checkSelectTab(Tab::memory))) {
				activeTab_ = Tab::memory;
				drawMemoryUI(draw);
				ImGui::EndTabItem();
			}

			if(tabs_.cb->record_ || tabs_.cb->mode_ == CommandBufferGui::UpdateMode::swapchain) {
				if(ImGui::BeginTabItem("Commands", nullptr, checkSelectTab(Tab::commandBuffer))) {
					activeTab_ = Tab::commandBuffer;
					tabs_.cb->draw(draw);
					ImGui::EndTabItem();
				}
			}

#ifdef VIL_VIZ_LCS
			if(ImGui::BeginTabItem("VizLCS", nullptr)) {
				activeTab_ = Tab::overview; // HACK
				static VizLCS vizlcs;
				vizlcs.draw();
				ImGui::EndTabItem();
			}
#endif // VIL_VIZ_LCS

			ImGui::EndTabBar();
		}
	}

	ImGui::End();

	ImGui::EndFrame();
	ImGui::Render();
}

void Gui::destroyed(const Handle& handle, VkObjectType type) {
	(void) type; // TODO

	ExtZoneScoped;
	assertOwned(dev().mutex);

	// Make sure that all our submissions that use the given handle have
	// finished.
	std::vector<VkFence> fences;
	std::vector<Draw*> draws;
	for(auto& draw : draws_) {
		if(!draw.inUse) {
			continue;
		}

		auto it = find(draw.usedHandles, &handle);
		if(it != draw.usedHandles.end()) {
			fences.push_back(draw.fence);
			draws.push_back(&draw);
		}
	}

	if(!fences.empty()) {
		VK_CHECK(dev().dispatch.WaitForFences(dev().handle, u32(fences.size()),
			fences.data(), true, UINT64_MAX));
	}

	// important that we *first* wait for the submission, then forward
	// this since e.g. the resources gui may destroy handles based on it
	tabs_.resources->destroyed(handle);
	tabs_.cb->destroyed(handle);

	// NOTE: I guess we could finish the draws here? But shouldn't be
	// a problem to wait until the next frame.
}

void Gui::activateTab(Tab tab) {
	activeTab_ = tab;
	activateTabCounter_ = 0u;

	// When switching towards the resources tab, make sure to refresh
	// the list of available resources, not showing "<Destroyed>"
	if(tab == Tab::resources) {
		tabs_.resources->firstUpdate_ = true;
	}
}

VkResult Gui::tryRender(Draw& draw, FrameInfo& info) {
	auto cleanupUnfished = [](Draw& draw) {
		for(auto& fcb : draw.onFinish) {
			fcb(draw, false);
		}

		draw.onFinish.clear();
		draw.usedHandles.clear();
		draw.usedHookState.reset();
	};

	// clean up finished application submissions
	// not strictly needed here but might make additional information
	// available to command viewer
	{
		std::lock_guard devMutex(dev().mutex);
		for(auto it = dev().pending.begin(); it != dev().pending.end();) {
			auto& subm = *it;
			if(auto nit = checkLocked(*subm); nit) {
				it = *nit;
				continue;
			}

			++it; // already increment to next one so we can't miss it
		}
	}

	// draw the ui
	VkCommandBufferBeginInfo cbBegin {};
	cbBegin.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
	VK_CHECK(dev().dispatch.BeginCommandBuffer(draw.cb, &cbBegin));
	DebugLabel cblbl(dev(), draw.cb, "vil:Gui:draw");

	ensureFontAtlas(draw.cb);

	this->draw(draw, info.fullscreen);
	auto& drawData = *ImGui::GetDrawData();
	this->uploadDraw(draw, drawData);

	auto blurred = false;
	if(blur_.dev) {
		auto& sc = dev().swapchains.get(info.swapchain);
		if(sc.supportsSampling) {
			vil::blur(blur_, draw.cb, info.imageIdx, {}, {});
			blurred = true;
		}
	}

	// General barrier to make sure all past submissions writing resources
	// we read are done. Not needed when we don't read a device resource.
	// PERF: could likely at least give a better dstAccessMask
	if(!draw.usedHandles.empty()) {
		VkMemoryBarrier memb {};
		memb.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
		memb.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT;
		memb.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT;

		dev().dispatch.CmdPipelineBarrier(draw.cb,
			VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
			VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
			0, 1, &memb, 0, nullptr, 0, nullptr);
	}

	for(auto& cb : preRender_) {
		cb(draw);
	}
	preRender_.clear();

	// optionally blur
	VkRenderPassBeginInfo rpBegin {};
	rpBegin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
	rpBegin.renderArea.extent = info.extent;
	rpBegin.renderPass = rp_;
	rpBegin.framebuffer = info.fb;

	VkClearValue clearValues[2] {};

	// color attachment (if we don't clear it, this value is ignored).
	// see render pass creation
	clearValues[0].color = {{0.f, 0.f, 0.f, 1.f}};

	// our depth attachment, always clear that.
	clearValues[1].depthStencil = {1.f, 0u};

	rpBegin.pClearValues = clearValues;
	rpBegin.clearValueCount = 2u;

	dev().dispatch.CmdBeginRenderPass(draw.cb, &rpBegin, VK_SUBPASS_CONTENTS_INLINE);

	if(blurred) {
		VkRect2D scissor;
		// scissor.offset = {};
		// scissor.extent = info.extent;
		scissor.offset.x = std::max(windowPos_.x, 0.f);
		scissor.offset.y = std::max(windowPos_.y, 0.f);
		scissor.extent.width = std::min(
			windowSize_.x + windowPos_.x - scissor.offset.x,
			info.extent.width - windowPos_.x);
		scissor.extent.height = std::min(
			windowSize_.y + windowPos_.y - scissor.offset.y,
			info.extent.height - windowPos_.y);

		VkViewport viewport;
		viewport.minDepth = 0.f;
		viewport.maxDepth = 1.f;
		viewport.x = 0u;
		viewport.y = 0u;
		viewport.width = info.extent.width;
		viewport.height = info.extent.height;
		// viewport.x = windowPos_.x;
		// viewport.y = windowPos_.y;
		// viewport.width = windowSize_.x;
		// viewport.height = windowSize_.y;

		dev().dispatch.CmdSetScissor(draw.cb, 0u, 1u, &scissor);
		dev().dispatch.CmdSetViewport(draw.cb, 0u, 1u, &viewport);

		float pcr[4];
		// scale
		pcr[0] = 1.f;
		pcr[1] = 1.f;
		// translate
		pcr[2] = 0.f;
		pcr[3] = 0.f;

		dev().dispatch.CmdBindPipeline(draw.cb, VK_PIPELINE_BIND_POINT_GRAPHICS, pipes_.gui);
		auto pcrStages = VK_SHADER_STAGE_VERTEX_BIT |
			VK_SHADER_STAGE_FRAGMENT_BIT |
			VK_SHADER_STAGE_COMPUTE_BIT;
		dev().dispatch.CmdPushConstants(draw.cb, pipeLayout_,
			pcrStages, 0, sizeof(pcr), pcr);
		dev().dispatch.CmdBindDescriptorSets(draw.cb, VK_PIPELINE_BIND_POINT_GRAPHICS,
			pipeLayout_, 0u, 1u, &blurDs_, 0, nullptr);
		VkDeviceSize off = 0u;
		dev().dispatch.CmdBindVertexBuffers(draw.cb, 0u, 1u, &blur_.vertices.buf, &off);
		dev().dispatch.CmdDraw(draw.cb, 6, 1, 0, 0);
	}

	this->recordDraw(draw, info.extent, info.fb, drawData);

	dev().dispatch.CmdEndRenderPass(draw.cb);

	for(auto& cb : postRender_) {
		cb(draw);
	}
	postRender_.clear();

	// General barrier to make sure all our reading is done before
	// future application submissions to this queue.
	// Not needed when we don't read a device resource.
	// PERF: could likely at least give a better srcAccessMask
	if(!draw.usedHandles.empty()) {
		VkMemoryBarrier memb {};
		memb.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
		memb.srcAccessMask = VK_ACCESS_MEMORY_READ_BIT;
		memb.dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT;

		dev().dispatch.CmdPipelineBarrier(draw.cb,
			VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
			VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
			0, 1, &memb, 0, nullptr, 0, nullptr);
	}

	dev().dispatch.EndCommandBuffer(draw.cb);

	// == Critical section ==
	// Important we already lock this mutex here since we need to make
	// sure no new submissions are done by application while we process
	// and evaluate the pending submissions
	// NOTE: lock order is important here! First lock device mutex,
	// later on lock queue mutex, that's how we must always do it.
	std::lock_guard devMutex(dev().mutex);

	auto invalidated = false;
	for(auto* handle : draw.usedHandles) {
		if(handle->objectType == VK_OBJECT_TYPE_BUFFER) {
			if(!static_cast<Buffer*>(handle)->handle) {
				invalidated = true;
				break;
			}
		} else if(handle->objectType == VK_OBJECT_TYPE_IMAGE) {
			if(!static_cast<Image*>(handle)->handle) {
				invalidated = true;
				break;
			}
		} else {
			dlg_error("unimplemented");
		}
	}

	if(invalidated) {
		cleanupUnfished(draw);
		return VK_INCOMPLETE;
	}

	// == Submit batch ==
	ZoneScopedN("BuildSubmission");

	VkSubmitInfo submitInfo {};
	submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
	submitInfo.commandBufferCount = 1u;
	submitInfo.pCommandBuffers = &draw.cb;

	// NOTE: could alternatively retrieve all submissions via
	// handle->refRecords->cb->pending (and handle->descriptos->...)
	// but this should be faster, there are usually only a small
	// number of pending submissions while there might be more recordings
	// referencing a handle.

	waitSemaphores_.clear();
	waitSemaphores_.insert(waitSemaphores_.end(),
		info.waitSemaphores.begin(), info.waitSemaphores.end());

	waitStages_.resize(info.waitSemaphores.size());
	std::fill_n(waitStages_.begin(), info.waitSemaphores.size(),
		VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);

	signalSemaphores_.clear();
	signalSemaphores_.emplace_back(draw.presentSemaphore);

	if(dev().doFullSync) {
		addFullSync(draw, submitInfo);
	} else {
		auto res = addLegacySync(draw, submitInfo);
		if(res != VK_SUCCESS) {
			dlg_assert(res != VK_INCOMPLETE);
			return res;
		}
	}

	VkResult res;

	{
		ZoneScopedN("dispatch.QueueSubmit");

		// PERF: when using timeline semaphores we don't need a
		// fence and can just use the timeline semaphore
		std::lock_guard queueLock(dev().queueMutex);
		res = dev().dispatch.QueueSubmit(usedQueue().handle,
			1u, &submitInfo, draw.fence);
	}

	if(res != VK_SUCCESS) {
		dlg_error("vkQueueSubmit error: {}", vk::name(res));
		dlg_assert(res != VK_INCOMPLETE);
		cleanupUnfished(draw);
		return res;
	}

	if(dev().timelineSemaphores) {
		usedQueue().lastLayerSubmission = draw.lastSubmissionID;
	}

	draw.inUse = true;
	draw.futureSemaphoreSignaled = true;
	draw.futureSemaphoreUsed = false;
	lastDraw_ = &draw;

	return VK_SUCCESS;
}

VkResult Gui::renderFrame(FrameInfo& info) {
	ZoneScoped;
	FrameMark;

	makeImGuiCurrent();
	Draw* foundDraw {};

	// find a free draw object
	{
		std::lock_guard devMutex(dev().mutex);

		for(auto& draw : draws_) {
			if(!draw.inUse) {
				foundDraw = &draw;
				continue;
			}

			if(dev().dispatch.GetFenceStatus(dev().handle, draw.fence) == VK_SUCCESS) {
				finishedLocked(draw);
				foundDraw = &draw;
			}
		}

		if(!foundDraw) {
			foundDraw = &draws_.emplace_back();
			foundDraw->init(*this, commandPool_);
		}
	}

	auto& draw = *foundDraw;
	draw.usedHandles.clear();
	foundDraw->lastUsed = ++drawCounter_;

	if(blur_.dev) {
		auto& sc = dev().swapchains.get(info.swapchain);
		if(sc.supportsSampling) {
			if(blurSwapchain_ != info.swapchain) {
				// TODO: would have to wait on previous draw here.
				// Not sure if realistic, old swapchain was probably destroyed
				// when we land here?
				vil::resize(blur_, sc.ci.imageExtent, sc.handle, sc.ci.imageFormat);
				blurSwapchain_ = info.swapchain;

				VkDescriptorImageInfo imgInfo {};
				imgInfo.imageView = blur_.view0;
				imgInfo.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
				imgInfo.sampler = dev().linearSampler;

				VkWriteDescriptorSet write {};
				write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
				write.pImageInfo = &imgInfo;
				write.descriptorCount = 1u;
				write.dstArrayElement = 0u;
				write.dstSet = blurDs_;
				write.dstBinding = 0u;
				write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;

				dev().dispatch.UpdateDescriptorSets(dev().handle, 1u, &write, 0u, nullptr);
			}
		}
	}

	ImGui::GetIO().DisplaySize.x = info.extent.width;
	ImGui::GetIO().DisplaySize.y = info.extent.height;

	using Secf = std::chrono::duration<float, std::ratio<1, 1>>;
	auto now = Clock::now();
	auto diff = now - lastFrame_;
	lastFrame_ = now;
	dt_ = std::chrono::duration_cast<Secf>(diff).count();
	if(dt_ > 0.f) {
		ImGui::GetIO().DeltaTime = dt_;
	}

	VkResult res = VK_INCOMPLETE;
	while(true) {
		res = tryRender(draw, info);
		if(res == VK_SUCCESS) {
			break;
		}

		// error case
		if(res != VK_INCOMPLETE) {
			return res;
		}

		dlg_info("re-trying rendering after mid-draw invalidation");
	}

	// call down
	VkPresentInfoKHR presentInfo {};
	presentInfo.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
	presentInfo.pImageIndices = &info.imageIdx;
	presentInfo.pWaitSemaphores = &foundDraw->presentSemaphore;
	presentInfo.waitSemaphoreCount = 1u;
	presentInfo.pSwapchains = &info.swapchain;
	presentInfo.swapchainCount = 1u;
	// TODO: forward pNext for all extensions we know. Really important
	// here, might contain important information.
	// Maybe just forward everything? Warn for unknown types?

	{
		ZoneScopedN("dispatch.QueuePresent");

		std::lock_guard queueLock(dev().queueMutex);
		res = dev().dispatch.QueuePresentKHR(info.presentQueue, &presentInfo);
	}

	if(res != VK_SUCCESS && res != VK_SUBOPTIMAL_KHR) {
		dlg_error("vkQueuePresentKHR error: {}", vk::name(res));

		// TODO: not sure how to handle this the best
		VK_CHECK(dev().dispatch.WaitForFences(dev().handle, 1,
			&foundDraw->fence, true, UINT64_MAX));

		std::lock_guard lock(dev().mutex);
		finishedLocked(*foundDraw);

		return res;
	}

	return VK_SUCCESS;
}

VkResult Gui::addLegacySync(Draw& draw, VkSubmitInfo& submitInfo) {
	std::vector<Submission*> waitSubmissions;
	for(auto& pending : reversed(dev().pending)) {
		// When the pending submission was submitted to the guiQueue
		// we don't need to sync via semaphore, the pipeline barrier is enough
		if(pending->queue == dev().gfxQueue) {
			continue;
		}

		// We only need to chain the last submission on each
		// queue as semaphores guarantee that everything before
		// has finished. Since dev().pending is ordered (by submission
		// order) we just take the first submission per queue
		auto found = false;
		for(auto& sub : waitSubmissions) {
			if(sub->parent->queue == pending->queue) {
				dlg_assert(sub->parent->globalSubmitID > pending->globalSubmitID);
				found = true;
				break;
			}
		}

		if(found) {
			continue;
		}

		auto subs = needsSyncLocked(*pending, draw);
		if(!subs.empty()) {
			// we only need the last submission from the batch
			// as semaphores guarantee everything before in submission
			// order has finished.
			waitSubmissions.push_back(const_cast<Submission*>(subs.back()));
		}
	}

	if(dev().timelineSemaphores) {
		dlg_assert(dev().resetSemaphores.empty());

		tsInfo_.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO;
		tsInfo_.pNext = submitInfo.pNext;

		// wait
		waitValues_.resize(waitSemaphores_.size()); // initial ones, ignored
		for(auto* sub : waitSubmissions) {
			// PERF: guess we could do better
			waitStages_.push_back(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
			waitSemaphores_.push_back(sub->parent->queue->submissionSemaphore);
			waitValues_.push_back(sub->queueSubmitID);
		}

		dlg_assert(waitValues_.size() == waitSemaphores_.size());
		tsInfo_.waitSemaphoreValueCount = u32(waitValues_.size());
		tsInfo_.pWaitSemaphoreValues = waitValues_.data();

		// signal
		signalSemaphores_.push_back(usedQueue().submissionSemaphore);
		draw.lastSubmissionID = ++usedQueue().submissionCounter;

		// signalValues[0] is uninitialized by design, should be
		// ignored by driver as signalSemaphores[0] is binary, we need
		// that since present semaphores cannot be timelined
		signalValues_.resize(2);
		signalValues_[1] = draw.lastSubmissionID;

		tsInfo_.signalSemaphoreValueCount = signalValues_.size();
		tsInfo_.pSignalSemaphoreValues = signalValues_.data();

		submitInfo.pNext = &tsInfo_;
	} else {
		// add dev.resetSemaphores while we are at it
		for(auto sem : dev().resetSemaphores) {
			waitSemaphores_.push_back(sem);
			waitStages_.push_back(VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
			draw.waitedUpon.push_back(sem);
		}

		dev().resetSemaphores.clear();

		// When this draw's futureSemaphore wasn't used, make sure
		// to reset it before we signal it again. This shouldn't acutally
		// cause an additional wait, we already wait for the last draw.
		if(!draw.futureSemaphoreUsed && draw.futureSemaphoreSignaled) {
			// This is a bit messy.
			// So, 'futureSemaphore' was signaled by us but never consumed
			// (happens when we just didn't access anything the
			// application used since then). We have to reset the
			// semaphore.
			// Initially, we simply waited on it via
			// submitInfo.pWaitSemaphores but no one knows if this
			// is valid or not, the spec is too vague.
			// Since this lead to a deadlock with anv, mesa 20.3, 21.1,
			// we just swap it out with a pool semaphore now.
			// We can't recreate it without waiting on draw.fence
			// which we want to avoid.
			dev().resetSemaphores.push_back(draw.futureSemaphore);
			draw.futureSemaphore = getSemaphoreFromPoolLocked(dev());

			draw.futureSemaphoreUsed = false;
			draw.futureSemaphoreSignaled = false;
		}

		signalSemaphores_.push_back(draw.futureSemaphore);

		for(auto* sub : waitSubmissions) {
			// take ownership of sub->ourSemaphore if it's valid
			VkSemaphore sem = sub->ourSemaphore;
			sub->ourSemaphore = {};

			// if sub->ourSemaphore has already been used before, we have
			// to synchronize with the queue via a new semaphore.
			if(!sem) {
				sem = getSemaphoreFromPoolLocked(dev());
				auto res = submitSemaphore(*sub->parent->queue, sem);
				if(res != VK_SUCCESS) {
					dlg_error("vkQueueSubmit error: {}", vk::name(res));
					return res;
				}
			}

			// PERF: guess we could do better
			waitStages_.push_back(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
			waitSemaphores_.push_back(sem);
			draw.waitedUpon.push_back(sem);
		}
	}

	dlg_assert(waitStages_.size() == waitSemaphores_.size());

	submitInfo.signalSemaphoreCount = u32(signalSemaphores_.size());
	submitInfo.pSignalSemaphores = signalSemaphores_.data();
	submitInfo.pWaitDstStageMask = waitStages_.data();
	submitInfo.waitSemaphoreCount = u32(waitSemaphores_.size());
	submitInfo.pWaitSemaphores = waitSemaphores_.data();

	return VK_SUCCESS;
}

void Gui::addFullSync(Draw& draw, VkSubmitInfo& submitInfo) {
	dlg_assert(dev().timelineSemaphores);

	// tsInfo_
	tsInfo_.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO;
	tsInfo_.pNext = submitInfo.pNext;

	// wait
	waitValues_.resize(waitSemaphores_.size()); // initial ones, ignored

	// when we used any application resources we sync with *all*
	// pending submissions.
	if(!draw.usedHandles.empty()) {
		for(auto& pqueue : dev().queues) {
			auto& queue = *pqueue;
			if(&queue == &usedQueue()) {
				continue;
			}

			u64 finishedID;
			dev().dispatch.GetSemaphoreCounterValue(dev().handle,
				queue.submissionSemaphore, &finishedID);

			// no pending submissions on this queue
			if(finishedID == queue.submissionCounter) {
				continue;
			}

			dlg_assert(finishedID < queue.submissionCounter);
			waitValues_.push_back(queue.submissionCounter);
			waitSemaphores_.push_back(queue.submissionSemaphore);
			waitStages_.push_back(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
		}
	}

	dlg_assert(waitValues_.size() == waitSemaphores_.size());
	tsInfo_.waitSemaphoreValueCount = u32(waitValues_.size());
	tsInfo_.pWaitSemaphoreValues = waitValues_.data();

	// signal
	signalSemaphores_.push_back(usedQueue().submissionSemaphore);
	draw.lastSubmissionID = ++usedQueue().submissionCounter;

	// signalValues[0] is uninitialized by design, should be
	// ignored by driver as signalSemaphores[0] is binary, we need
	// that since present semaphores cannot be timelined
	signalValues_.resize(2);
	signalValues_[1] = draw.lastSubmissionID;

	tsInfo_.signalSemaphoreValueCount = signalValues_.size();
	tsInfo_.pSignalSemaphoreValues = signalValues_.data();

	submitInfo.pNext = &tsInfo_;

	// build submitInfo
	dlg_assert(waitStages_.size() == waitSemaphores_.size());

	submitInfo.signalSemaphoreCount = u32(signalSemaphores_.size());
	submitInfo.pSignalSemaphores = signalSemaphores_.data();
	submitInfo.pWaitDstStageMask = waitStages_.data();
	submitInfo.waitSemaphoreCount = u32(waitSemaphores_.size());
	submitInfo.pWaitSemaphores = waitSemaphores_.data();
}

void Gui::waitForDraws() {
	std::vector<VkFence> fences;
	for(auto& draw : draws_) {
		if(draw.inUse) {
			fences.push_back(draw.fence);
		}
	}

	if(!fences.empty()) {
		VK_CHECK(dev().dispatch.WaitForFences(dev().handle,
			u32(fences.size()), fences.data(), true, UINT64_MAX));
		// we can't reset the draws here
	}
}

void Gui::makeImGuiCurrent() {
	ImGui::SetCurrentContext(imgui_);
}

void Gui::selectResource(Handle& handle, bool activateTab) {
	tabs_.resources->select(handle);
	tabs_.resources->filter_ = handle.objectType;

	if(activateTab) {
		this->activateTab(Tab::resources);
	}
}

Draw* Gui::latestPendingDrawSyncLocked(SubmissionBatch& batch) {
	Draw* ret {};
	for(auto& draw : draws_) {
		if(!draw.inUse || dev().dispatch.GetFenceStatus(dev().handle, draw.fence) == VK_SUCCESS) {
			continue;
		}

		if((!ret || draw.lastUsed > ret->lastUsed) && !needsSyncLocked(batch, draw).empty()) {
			ret = &draw;
		}
	}

	return ret;
}

void Gui::finishedLocked(Draw& draw) {
	dlg_assert(draw.inUse);
	dlg_assert(dev().dispatch.GetFenceStatus(dev().handle, draw.fence) == VK_SUCCESS);

	for(auto semaphore : draw.waitedUpon) {
		dev().semaphorePool.push_back(semaphore);
	}

	for(auto& cb : draw.onFinish) {
		cb(draw, true);
	}

	draw.onFinish.clear();
	draw.waitedUpon.clear();
	draw.usedHandles.clear();
	draw.usedHookState.reset();

	VK_CHECK(dev().dispatch.ResetFences(dev().handle, 1, &draw.fence));

	draw.inUse = false;
}

// util
void refButton(Gui& gui, Handle& handle) {
	// We need the PushID/PopID since there may be multiple
	// ref buttons with the same label (e.g. for unnamed handles)
	constexpr auto showType = true;
	ImGui::PushID(&handle);
	if(ImGui::Button(name(handle, showType).c_str())) {
		gui.selectResource(handle);
	}
	ImGui::PopID();
}

void refButtonOpt(Gui& gui, Handle* handle) {
	if(handle) {
		refButton(gui, *handle);
	}
}

void refButtonExpect(Gui& gui, Handle* handle) {
	dlg_assert_or(handle, return);
	refButton(gui, *handle);
}

void refButtonD(Gui& gui, Handle* handle, const char* str) {
	if(handle) {
		refButton(gui, *handle);
	} else {
		ImGui::PushItemFlag(ImGuiItemFlags_Disabled, true);
		ImGui::PushStyleVar(ImGuiStyleVar_Alpha, 0.6f);

		// NOTE: could add popup to button further explaining what's going on
		ImGui::Button(str);

		ImGui::PopStyleVar();
		ImGui::PopItemFlag();
	}
}

void Gui::addPreRender(Recorder rec) {
	preRender_.emplace_back(std::move(rec));
}

void Gui::addPostRender(Recorder rec) {
	postRender_.emplace_back(std::move(rec));
}

Queue& Gui::usedQueue() const {
	return *dev().gfxQueue;
}

void Gui::visible(bool newVisible) {
	visible_ = newVisible;

	if(!newVisible) {
		auto& hook = *dev().commandHook;
		hook.freeze.store(true);
	}
}

} // namespace vil
