kiba-engine
device.c
1 #include <kiba/containers/array.h>
2 #include <kiba/core/memory.h>
3 #include <kiba/core/string.h>
4 #include <kiba/gpu/vulkan/allocator.h>
5 #include <kiba/gpu/vulkan/conv.h>
6 #include <kiba/gpu/vulkan/device.h>
7 #include <kiba/gpu/vulkan/instance.h>
8 #include <kiba/gpu/vulkan/queue.h>
9 #include <kiba/gpu/vulkan/util.h>
10 
11 // FIXME ideally the memory header should include a memcmp equivalent
12 #include <string.h>
13 
14 // TODO ideally we'd probably want to get all devices as part of the vulkan initialization
15 // then just hand out the devices based on requirements. that should also simplify queue sharing
16 
18  b8 graphics;
19  b8 transfer;
20  b8 present;
21  b8 compute;
22  b8 discrete_gpu;
23  VkPhysicalDeviceFeatures features;
24  array_of(const char *) extensions;
25 };
26 
27 b8 vk_select_physical_device(struct gpu_backend_device *device, struct vk_device_requirements requirements);
28 b8 vk_select_logical_device(struct gpu_backend_device *device, struct vk_device_requirements requirements);
29 
30 b8 vk_physical_device_meets_requirements(struct gpu_backend_device *device,
31  VkPhysicalDevice phys_device,
32  VkPhysicalDeviceFeatures *features,
33  VkPhysicalDeviceProperties *props,
34  struct vk_device_requirements requirements);
35 
36 b8 gpu_backend_device_create(struct gpu_backend_device *device) {
37  // TODO temp default config
38  array_of(const char *) device_extensions = array_create(const char *, 4, &vk_alloc.kiba_alloc);
39  if (device_extensions == KB_NULL) {
40  KB_ERROR("could not create array for device extensions");
41  return false;
42  }
43  array_push(device_extensions, VK_KHR_SWAPCHAIN_EXTENSION_NAME);
44  array_push(device_extensions, VK_KHR_SHADER_NON_SEMANTIC_INFO_EXTENSION_NAME);
45  VkPhysicalDeviceFeatures features = {
46  .samplerAnisotropy = VK_TRUE,
47  .fillModeNonSolid = VK_TRUE,
48  };
49  struct vk_device_requirements requirements = {
50  .graphics = true,
51  .transfer = true,
52  .present = true,
53  .compute = true,
54  .discrete_gpu = true,
55  .extensions = device_extensions,
56  .features = features,
57  };
58 
59  if (!vk_select_physical_device(device, requirements)) {
60  KB_ERROR("failed to find GPU meeting the requirements");
61  return false;
62  }
63  if (!vk_select_logical_device(device, requirements)) {
64  KB_ERROR("failed to create logical device");
65  return false;
66  }
67  device->framebuffers = array_create(struct vk_framebuffer, 8, &vk_alloc.kiba_alloc);
68  if (device->framebuffers == KB_NULL) {
69  KB_ERROR("cannot reserve memory for framebuffer storage");
70  return false;
71  }
72  device->renderpasses = array_create(struct vk_renderpass, 8, &vk_alloc.kiba_alloc);
73  if (device->renderpasses == KB_NULL) {
74  KB_ERROR("cannot reserve memory for renderpass storage");
75  return false;
76  }
77 
78  return true;
79 }
80 
81 void gpu_backend_device_finish_running_tasks(struct gpu_backend_device *device) { vkDeviceWaitIdle(device->logical); }
82 
83 void gpu_backend_device_destroy(struct gpu_backend_device *device) {
84  vk_queue_destroy(device->logical, &device->graphics_queue);
85  vk_queue_destroy(device->logical, &device->transfer_queue);
86  vk_queue_destroy(device->logical, &device->present_queue);
87  vk_queue_destroy(device->logical, &device->compute_queue);
88  array_for_each(struct vk_framebuffer, fb, device->framebuffers) {
89  vkDestroyFramebuffer(device->logical, fb->fb, &vk_alloc.vulkan_callbacks);
90  }
91  array_destroy(&device->framebuffers);
92  array_for_each(struct vk_renderpass, rp, device->renderpasses) {
93  vkDestroyRenderPass(device->logical, rp->rp, &vk_alloc.vulkan_callbacks);
94  }
95  array_destroy(&device->renderpasses);
96  if (device->logical != VK_NULL_HANDLE) {
97  vkDestroyDevice(device->logical, &vk_alloc.vulkan_callbacks);
98  }
99  memory_zero(device, sizeof(struct gpu_backend_device));
100 }
101 
102 b8 vk_select_physical_device(struct gpu_backend_device *device, struct vk_device_requirements requirements) {
103  u32 physical_device_count = 0;
104  VK_CALL_B8(vkEnumeratePhysicalDevices(vk_instance.raw, &physical_device_count, 0));
105  if (physical_device_count == 0) {
106  KB_INFO("found no device that supports vulkan");
107  return false;
108  }
109 
110  array_of(VkPhysicalDevice) physical_devices =
111  array_create(VkPhysicalDevice, physical_device_count, &vk_alloc.kiba_alloc);
112  array_resize(&physical_devices, physical_device_count);
113  if (physical_devices == KB_NULL) {
114  KB_ERROR("unable to allocate enough memory for list of physical devices");
115  return false;
116  }
117  VK_CALL_B8(vkEnumeratePhysicalDevices(vk_instance.raw, &physical_device_count, physical_devices));
118 
119  b8 ret = false;
120  array_for_each(const VkPhysicalDevice, physical_device, physical_devices) {
121  VkPhysicalDeviceProperties physical_device_properties;
122  VkPhysicalDeviceFeatures physical_device_features;
123  vkGetPhysicalDeviceProperties(*physical_device, &physical_device_properties);
124  vkGetPhysicalDeviceFeatures(*physical_device, &physical_device_features);
125  KB_INFO("checking device {raw_string} against requirements", physical_device_properties.deviceName);
126  if (vk_physical_device_meets_requirements(device,
127  *physical_device,
128  &physical_device_features,
129  &physical_device_properties,
130  requirements)) {
131  KB_INFO("device {raw_string} meets requirements", physical_device_properties.deviceName);
132  device->physical = *physical_device;
133  ret = true;
134  break;
135  }
136  KB_INFO("skipping physical device {raw_string} because it does not match the requirements",
137  physical_device_properties.deviceName);
138  }
139  array_destroy(&physical_devices);
140  return ret;
141 }
142 
143 b8 vk_select_logical_device(struct gpu_backend_device *device, struct vk_device_requirements requirements) {
144  const usize max_different_queue = 8;
145 
146  u32 indices[max_different_queue];
147  u32 index = 0;
148  indices[index++] = device->graphics_queue.index;
149  if (device->graphics_queue.index != device->present_queue.index) {
150  indices[index++] = device->present_queue.index;
151  }
152  if (device->graphics_queue.index != device->transfer_queue.index) {
153  indices[index++] = device->transfer_queue.index;
154  }
155 
156  f32 high_prio = 1.0f;
157  VkDeviceQueueCreateInfo queue_create_infos[max_different_queue];
158  for (u32 i = 0; i < index; ++i) {
159  queue_create_infos[i].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
160  queue_create_infos[i].queueFamilyIndex = indices[i];
161  // TODO: should be configurable how many
162  queue_create_infos[i].queueCount = 1;
163  queue_create_infos[i].flags = 0;
164  queue_create_infos[i].pNext = 0;
165  queue_create_infos[i].pQueuePriorities = &high_prio;
166  }
167 
168  VkDeviceCreateInfo device_create_info = {
169  .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
170  .pQueueCreateInfos = queue_create_infos,
171  .queueCreateInfoCount = index,
172  .pEnabledFeatures = &requirements.features,
173  .ppEnabledExtensionNames = requirements.extensions,
174  .enabledExtensionCount = (u32) array_size(requirements.extensions),
175  };
176  VK_CALL_B8(vkCreateDevice(device->physical, &device_create_info, &vk_alloc.vulkan_callbacks, &device->logical));
177 
178  if (!vk_queue_create(device->logical, &device->graphics_queue)) {
179  KB_ERROR("failed to initialize graphics queue");
180  device->graphics_queue.available = false;
181  return false;
182  }
183  if (!vk_queue_create(device->logical, &device->transfer_queue)) {
184  KB_ERROR("failed to initialize transfer queue");
185  device->transfer_queue.available = false;
186  return false;
187  }
188  if (!vk_queue_create(device->logical, &device->present_queue)) {
189  KB_ERROR("failed to initialize present queue");
190  device->present_queue.available = false;
191  return false;
192  }
193  if (!vk_queue_create(device->logical, &device->compute_queue)) {
194  KB_ERROR("failed to initialize compute queue");
195  device->compute_queue.available = false;
196  return false;
197  }
198  return true;
199 }
200 
201 b8 vk_physical_device_meets_requirements(struct gpu_backend_device *device,
202  VkPhysicalDevice phys_device,
203  VkPhysicalDeviceFeatures *features,
204  VkPhysicalDeviceProperties *props,
205  struct vk_device_requirements requirements) {
206  UNUSED(features);
207  // discrete gpu requirement
208  if (requirements.discrete_gpu && props->deviceType != VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU) {
209  KB_INFO("device is not viable as its not a discrete GPU");
210  return false;
211  }
212 
213  // queue families
214  struct vk_queue graphics_queue;
215  struct vk_queue transfer_queue;
216  struct vk_queue present_queue;
217  struct vk_queue compute_queue;
218 
219  u32 queue_family_count = 0;
220  vkGetPhysicalDeviceQueueFamilyProperties(phys_device, &queue_family_count, 0);
221 
222  array_of(VkQueueFamilyProperties) queue_props =
223  array_create(VkQueueFamilyProperties, queue_family_count, &vk_alloc.kiba_alloc);
224  array_resize(&queue_props, queue_family_count);
225  if (queue_props == KB_NULL) {
226  KB_INFO("unable to allocate enough memory for list of queue properties");
227  return false;
228  }
229  vkGetPhysicalDeviceQueueFamilyProperties(phys_device, &queue_family_count, queue_props);
230 
231  for (u32 i = 0; i < queue_family_count; ++i) {
232  if (queue_props[i].queueFlags & VK_QUEUE_GRAPHICS_BIT && !device->graphics_queue.available) {
233  graphics_queue.available = true;
234  graphics_queue.index = i;
235  VkBool32 present_support = true; // false;
236  // TODO reintroduce this somehow? maybe it's not even needed
237  // VK_CALL_B8(vkGetPhysicalDeviceSurfaceSupportKHR(device, i, context->surface, &present_support));
238  if (present_support) {
239  present_queue.available = true;
240  present_queue.index = i;
241  }
242  }
243  if (queue_props[i].queueFlags & VK_QUEUE_TRANSFER_BIT) {
244  transfer_queue.available = true;
245  transfer_queue.index = i;
246  }
247  if (queue_props[i].queueFlags & VK_QUEUE_COMPUTE_BIT) {
248  compute_queue.available = true;
249  compute_queue.index = i;
250  }
251  }
252  array_destroy(&queue_props);
253  if (requirements.graphics && !graphics_queue.available) {
254  KB_INFO("device does not support required graphics queue");
255  return false;
256  }
257  if (requirements.transfer && !transfer_queue.available) {
258  KB_INFO("device does not support required transfer queue");
259  return false;
260  }
261  if (requirements.present && !present_queue.available) {
262  KB_INFO("device does not support required present queue");
263  return false;
264  }
265  if (requirements.compute && !compute_queue.available) {
266  KB_INFO("device does not support required compute queue");
267  return false;
268  }
269 
270  // device extensions
271  u32 extension_count = 0;
272  vkEnumerateDeviceExtensionProperties(phys_device, 0, &extension_count, 0);
273 
274  array_of(VkExtensionProperties) extensions =
275  array_create(VkExtensionProperties, extension_count, &vk_alloc.kiba_alloc);
276  array_resize(&extensions, extension_count);
277  if (extensions == KB_NULL) {
278  KB_INFO("unable to allocate enough memory for list of extensions");
279  return false;
280  }
281  vkEnumerateDeviceExtensionProperties(phys_device, 0, &extension_count, extensions);
282 
283  u32 required_extension_count = (u32) array_size(requirements.extensions);
284  b8 has_extensions = true;
285  if (extension_count >= required_extension_count) {
286  array_for_each(const char *, extension, requirements.extensions) {
287  b8 found = false;
288  for (u32 j = 0; j < extension_count; ++j) {
289  if (string_equal(string_from_raw(*extension), string_from_raw(extensions[j].extensionName))) {
290  found = true;
291  break;
292  }
293  }
294  if (!found) {
295  KB_INFO("required extension {raw_string} not supported by device", *extension);
296  has_extensions = false;
297  }
298  }
299  } else {
300  KB_INFO("device cannot support all required extensions");
301  has_extensions = false;
302  }
303 
304  array_destroy(&extensions);
305 
306  device->graphics_queue = graphics_queue;
307  device->transfer_queue = transfer_queue;
308  device->present_queue = present_queue;
309  device->compute_queue = compute_queue;
310 
311  return has_extensions;
312 }
313 
314 static u32 vk_device_get_memory_type(struct gpu_backend_device device,
315  u32 type_filter,
316  VkMemoryPropertyFlagBits flags) {
317  VkPhysicalDeviceMemoryProperties memory_properties;
318  vkGetPhysicalDeviceMemoryProperties(device.physical, &memory_properties);
319  for (u32 i = 0; i < memory_properties.memoryTypeCount; ++i) {
320  if (type_filter & KB_UBIT(i) && KB_FLAGS_ALL_SET(memory_properties.memoryTypes[i].propertyFlags, flags)) {
321  return i;
322  }
323  }
324  return U32_MAX;
325 }
326 
327 b8 vk_device_allocate_memory(struct gpu_backend_device device,
328  VkDeviceMemory *memory,
329  VkMemoryRequirements requirements,
330  VkMemoryPropertyFlagBits properties) {
331  u32 memory_type_index = vk_device_get_memory_type(device, requirements.memoryTypeBits, properties);
332  KB_ASSERT(memory_type_index != U32_MAX,
333  "memory type must be available"); // TODO should have fallback handling
334  VkMemoryAllocateInfo alloc_info = {
335  .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
336  .allocationSize = requirements.size,
337  .memoryTypeIndex = memory_type_index,
338  };
339  VK_CALL_B8(vkAllocateMemory(device.logical, &alloc_info, &vk_alloc.vulkan_callbacks, memory));
340  return true;
341 }
342 
343 b8 vk_device_create_framebuffer(struct gpu_backend_device *device,
344  struct vk_framebuffer_key key,
345  VkRenderPass renderpass,
346  VkFramebuffer *out_framebuffer) {
347  array_for_each(struct vk_framebuffer, fb, device->framebuffers) {
348  if (memcmp(&fb->key, &key, sizeof(struct vk_framebuffer_key)) == 0) {
349  *out_framebuffer = fb->fb;
350  KB_DEBUG("serving framebuffer from cache");
351  return true;
352  }
353  }
354  KB_DEBUG("creating framebuffer");
355  VkImageView attachments[KB_GPU_MAX_ATTACHMENTS] = {0};
356  for (u32 i = 0; i < key.attachment_count; ++i) {
357  attachments[i] = key.attachments[i].view;
358  }
359  VkFramebufferCreateInfo framebuffer_info = {
360  .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
361  .renderPass = renderpass,
362  .attachmentCount = key.attachment_count,
363  .pAttachments = attachments,
364  .width = key.extent.width,
365  .height = key.extent.height,
366  .layers = key.extent.depth ? key.extent.depth : 1, // TODO keep the safeguard against zero?
367  };
368  VK_CALL_B8(vkCreateFramebuffer(device->logical, &framebuffer_info, &vk_alloc.vulkan_callbacks, out_framebuffer));
369  struct vk_framebuffer new_fb = {
370  .key = key,
371  .fb = *out_framebuffer,
372  };
373  return array_push_checked(&device->framebuffers, &new_fb);
374 }
375 
376 b8 vk_device_create_renderpass(struct gpu_backend_device *device,
377  struct vk_renderpass_key key,
378  VkRenderPass *out_renderpass) {
379  array_for_each(struct vk_renderpass, rp, device->renderpasses) {
380  if (memcmp(&rp->key, &key, sizeof(struct vk_renderpass_key)) == 0) {
381  *out_renderpass = rp->rp;
382  KB_DEBUG("serving render pass from cache");
383  return true;
384  }
385  }
386  KB_DEBUG("creating render pass");
387  u32 attachment_descr_count = 0;
388  VkAttachmentDescription attachment_descr[KB_GPU_MAX_ATTACHMENTS] = {0};
389  u32 color_ref_count = 0;
390  VkAttachmentReference color_refs[KB_GPU_MAX_COLOR_ATTACHMENTS] = {0};
391  u32 resolve_ref_count = 0;
392  VkAttachmentReference resolve_refs[KB_GPU_MAX_COLOR_ATTACHMENTS] = {0};
393 
394  VkAttachmentReference unused_ref = {
395  .attachment = VK_ATTACHMENT_UNUSED,
396  .layout = VK_IMAGE_LAYOUT_UNDEFINED,
397  };
398  for (u32 ca = 0; ca < key.color_count; ++ca) {
399  const struct vk_color_attachment_key color_attachment = key.colors[ca];
400  VkAttachmentReference color_ref = {
401  .attachment = attachment_descr_count,
402  .layout = color_attachment.base.layout,
403  };
404  color_refs[color_ref_count++] = color_ref;
405  VkAttachmentDescription color_descr = {
406  .format = color_attachment.base.format,
407  .samples = VK_SAMPLE_COUNT_1_BIT,
408  .loadOp = vk_convert_attachment_load_op(color_attachment.base.ops),
409  .storeOp = vk_convert_attachment_store_op(color_attachment.base.ops),
410  .initialLayout = color_attachment.base.layout,
411  .finalLayout = color_attachment.base.layout,
412  };
413  attachment_descr[attachment_descr_count++] = color_descr;
414 
415  VkAttachmentReference resolve_ref = unused_ref;
416  if (color_attachment.resolve_set) {
417  resolve_ref = (VkAttachmentReference){
418  .attachment = attachment_descr_count,
419  .layout = color_attachment.resolve.layout,
420  };
421  VkAttachmentDescription resolve_descr = {
422  .format = color_attachment.resolve.format,
423  .samples = VK_SAMPLE_COUNT_1_BIT,
424  .loadOp = vk_convert_attachment_load_op(color_attachment.resolve.ops),
425  .storeOp = vk_convert_attachment_store_op(color_attachment.resolve.ops),
426  .initialLayout = color_attachment.resolve.layout,
427  .finalLayout = color_attachment.resolve.layout,
428  };
429  attachment_descr[attachment_descr_count++] = resolve_descr;
430  }
431  resolve_refs[resolve_ref_count++] = resolve_ref;
432  }
433 
434  VkSubpassDescription subpass = {
435  .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
436  .colorAttachmentCount = color_ref_count,
437  .pColorAttachments = color_refs,
438  .pResolveAttachments = resolve_refs,
439  };
440 
441  VkAttachmentReference depth_ref;
442  b8 set_depth_ref = false;
443  if (key.depth_stencil_set) {
444  set_depth_ref = true;
445  depth_ref = (VkAttachmentReference){
446  .attachment = attachment_descr_count,
447  .layout = key.depth_stencil.base.layout,
448  };
449  VkAttachmentDescription depth_descr = {
450  .format = key.depth_stencil.base.format,
451  .samples = VK_SAMPLE_COUNT_1_BIT,
452  .loadOp = vk_convert_attachment_load_op(key.depth_stencil.base.ops),
453  .storeOp = vk_convert_attachment_store_op(key.depth_stencil.base.ops),
454  .stencilLoadOp = vk_convert_attachment_load_op(key.depth_stencil.stencil_ops),
455  .stencilStoreOp = vk_convert_attachment_store_op(key.depth_stencil.stencil_ops),
456  .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, // TODO key.depth_stencil.base.layout,
457  .finalLayout = key.depth_stencil.base.layout,
458  };
459  attachment_descr[attachment_descr_count++] = depth_descr;
460  }
461  subpass.pDepthStencilAttachment = set_depth_ref ? &depth_ref : NULL;
462 
463  VkSubpassDependency subpass_dependency = {
464  // TODO is this still needed? -> technically not but may be useful in other use cases to automatically
465  // transition swapchain images
466  .srcSubpass = VK_SUBPASS_EXTERNAL,
467  .dstSubpass = 0,
468  .srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT,
469  .srcAccessMask = 0,
470  .dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT,
471  .dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
472  };
473 
474  VkRenderPassCreateInfo renderpass_info = {
475  .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
476  .attachmentCount = attachment_descr_count,
477  .pAttachments = attachment_descr,
478  .subpassCount = 1,
479  .pSubpasses = &subpass,
480  .dependencyCount = 1,
481  .pDependencies = &subpass_dependency,
482  };
483 
484  VK_CALL_B8(vkCreateRenderPass(device->logical, &renderpass_info, &vk_alloc.vulkan_callbacks, out_renderpass));
485  struct vk_renderpass new_rp = {
486  .key = key,
487  .rp = *out_renderpass,
488  };
489  return array_push_checked(&device->renderpasses, &new_rp);
490 }
491 
492 void vk_device_set_object_name(VkDevice device, VkObjectType type, void *handle, const char *name) {
493 #ifdef KB_DEBUG_BUILD
494  VkDebugUtilsObjectNameInfoEXT info = {
495  .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT,
496  .objectType = type,
497  .objectHandle = (u64) handle,
498  .pObjectName = name,
499  };
500  VkResult res = vk_instance.setDebugUtilsObjectName(device, &info);
501  if (res != VK_SUCCESS) {
502  KB_WARN("error setting object name {raw_string} for object {pointer} of type {u32}: {u32}",
503  name,
504  handle,
505  type,
506  res);
507  }
508 #else
509  UNUSED(device);
510  UNUSED(type);
511  UNUSED(handle);
512  UNUSED(name);
513 #endif
514 }
void * memory_zero(void *mem, usize size)
Zero out memory.
Definition: memory.c:79
Lightweight layer between platform and other engine components to enable tracing/monitoring.
b8 string_equal(const string lhs, const string rhs)
Check if the contents of two strings are equal.
Definition: string.c:118
string string_from_raw(const char *raw)
Construct a string from a raw string.
Definition: string.c:13
Custom library for interactions with strings using string views.
#define KB_UBIT(n)
Get unsigned value with the n-th bit set to 1. All other bits are 0.
Definition: defines.h:72
#define UNUSED(x)
Mark parameter as unused.
Definition: defines.h:21
#define KB_FLAGS_ALL_SET(value, flags)
Check if all flags are set inside value.
Definition: defines.h:62
#define KB_NULL
Value of an invalid ptr (nullptr).
Definition: defines.h:18
#define KB_DEBUG(...)
Log entry with debug log level.
Definition: log.h:163
#define KB_ASSERT(expr,...)
Perform runtime assertion and log failures.
Definition: log.h:133
#define KB_WARN(...)
Log entry with warn log level.
Definition: log.h:161
#define KB_ERROR(...)
Log entry with error log level.
Definition: log.h:142
#define KB_INFO(...)
Log entry with info log level.
Definition: log.h:162