Line data Source code
1 : /**
2 : Copyright (c) 2023 Stappler LLC <admin@stappler.dev>
3 :
4 : Permission is hereby granted, free of charge, to any person obtaining a copy
5 : of this software and associated documentation files (the "Software"), to deal
6 : in the Software without restriction, including without limitation the rights
7 : to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 : copies of the Software, and to permit persons to whom the Software is
9 : furnished to do so, subject to the following conditions:
10 :
11 : The above copyright notice and this permission notice shall be included in
12 : all copies or substantial portions of the Software.
13 :
14 : THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 : IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 : FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 : AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 : LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 : OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 : THE SOFTWARE.
21 : **/
22 :
23 : #include "XLSnnVkInputLayer.h"
24 : #include "XLSnnVkShaders.h"
25 : #include "XLVkPipeline.h"
26 : #include "XLVkObject.h"
27 : #include "XLCoreAttachment.h"
28 : #include "XLCoreFrameQueue.h"
29 : #include "XLCoreFrameRequest.h"
30 :
31 : namespace stappler::xenolith::vk::shadernn {
32 :
33 0 : InputLayer:: ~InputLayer() { }
34 :
35 0 : bool InputLayer::init(Queue::Builder &queueBuilder, QueuePassBuilder &builder, const AttachmentData *input, const AttachmentData *output) {
36 : using namespace core;
37 :
38 0 : auto dataBuffer = queueBuilder.addAttachemnt("InputLayerBuffer", [] (AttachmentBuilder &builder) -> Rc<core::Attachment> {
39 0 : builder.defineAsInput();
40 0 : auto a = Rc<GenericAttachment>::create(builder);
41 0 : a->setValidateInputCallback([] (const Attachment &, const Rc<AttachmentInputData> &data) {
42 0 : return dynamic_cast<InputDataInput *>(data.get()) != nullptr;
43 : });
44 0 : a->setFrameHandleCallback([] (Attachment &a, const FrameQueue &q) {
45 0 : auto h = Rc<AttachmentHandle>::create(a, q);
46 0 : h->setInputCallback([] (AttachmentHandle &handle, FrameQueue &queue, AttachmentInputData *input, Function<void(bool)> &&cb) {
47 0 : cb(true);
48 0 : });
49 0 : return h;
50 0 : });
51 0 : return a;
52 0 : });
53 :
54 0 : auto passInput = builder.addAttachment(input, [] (AttachmentPassBuilder &builder) {
55 0 : builder.setDependency(AttachmentDependencyInfo{
56 : PipelineStage::Transfer, AccessType::TransferWrite,
57 : PipelineStage::ComputeShader, AccessType::ShaderRead,
58 : FrameRenderPassState::Submitted,
59 : });
60 0 : builder.setInitialLayout(AttachmentLayout::TransferDstOptimal);
61 0 : builder.setFinalLayout(AttachmentLayout::General);
62 0 : });
63 :
64 0 : auto passOutput = builder.addAttachment(output, [] (AttachmentPassBuilder &builder) {
65 0 : builder.setDependency(AttachmentDependencyInfo{
66 : PipelineStage::ComputeShader, AccessType::ShaderWrite,
67 : PipelineStage::ComputeShader, AccessType::ShaderWrite,
68 : FrameRenderPassState::Submitted,
69 : });
70 0 : builder.setInitialLayout(AttachmentLayout::General);
71 0 : builder.setFinalLayout(AttachmentLayout::General);
72 0 : });
73 :
74 0 : builder.addAttachment(dataBuffer);
75 :
76 0 : auto layout = builder.addDescriptorLayout([&] (PipelineLayoutBuilder &layoutBuilder) {
77 0 : layoutBuilder.addSet([&] (DescriptorSetBuilder &setBuilder) {
78 0 : setBuilder.addDescriptor(passInput, DescriptorType::StorageImage, AttachmentLayout::General);
79 0 : setBuilder.addDescriptor(passOutput, DescriptorType::StorageImage, AttachmentLayout::General);
80 0 : });
81 0 : });
82 :
83 0 : auto precision = getAttachmentPrecision(output);
84 :
85 0 : builder.addSubpass([&] (SubpassBuilder &subpassBuilder) {
86 0 : subpassBuilder.addComputePipeline("InputLayerPipeline", layout,
87 0 : queueBuilder.addProgramByRef("InputLayerProgram", getShader(LayerShader::Norm, precision)));
88 0 : });
89 :
90 0 : _inputAttachment = input;
91 0 : _outputAttachment = output;
92 0 : _dataAttachment = dataBuffer;
93 :
94 0 : _frameHandleCallback = [] (core::QueuePass &pass, const FrameQueue &q) {
95 0 : return Rc<LayerHandle>::create(pass, q);
96 0 : };
97 :
98 0 : return QueuePass::init(builder);
99 : }
100 :
101 0 : bool InputLayer::LayerHandle::prepare(FrameQueue &q, Function<void(bool)> &&cb) {
102 0 : auto pass = (InputLayer *)_queuePass.get();
103 :
104 0 : if (auto imageAttachment = q.getAttachment(pass->getInputAttachment())) {
105 0 : _inputImage = (const vk::ImageAttachmentHandle *)imageAttachment->handle.get();
106 : }
107 :
108 0 : if (auto imageAttachment = q.getAttachment(pass->getOutputAttachment())) {
109 0 : _outputImage = (const vk::ImageAttachmentHandle *)imageAttachment->handle.get();
110 : }
111 :
112 0 : if (auto bufferAttachment = q.getAttachment(pass->getDataAttachment())) {
113 0 : _dataHandle = (const core::AttachmentHandle *)bufferAttachment->handle.get();
114 : }
115 :
116 0 : return vk::QueuePassHandle::prepare(q, move(cb));
117 : }
118 :
119 0 : void InputLayer::LayerHandle::doTransferInput(vk::CommandBuffer &buf, DeviceFrameHandle &handle, InputDataInput *input) {
120 0 : auto &pool = handle.getMemPool(nullptr);
121 :
122 : // spawn image from frame memory pool
123 0 : auto image = static_cast<Image *>(_inputImage->getQueueData()->image->getImage().get());
124 :
125 : // alloc staging buffer
126 0 : auto staging = pool->spawn(AllocationUsage::DeviceLocalHostVisible, BufferInfo(core::BufferUsage::TransferSrc,
127 0 : size_t(image->getMemory()->getInfo().size)));
128 :
129 0 : staging->map([&] (uint8_t *buf, VkDeviceSize size) {
130 0 : input->image.writeData(buf, size);
131 0 : });
132 :
133 0 : buf.cmdCopyBufferToImage(staging, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 0);
134 :
135 : ImageMemoryBarrier outImageBarrier(image, VkAccessFlags(core::AccessType::TransferWrite), VkAccessFlags(core::AccessType::ShaderRead),
136 0 : VkImageLayout(core::AttachmentLayout::TransferDstOptimal), VkImageLayout(core::AttachmentLayout::General));
137 0 : buf.cmdPipelineBarrier(VkPipelineStageFlags(core::PipelineStage::Transfer), VkPipelineStageFlags(core::PipelineStage::ComputeShader),
138 0 : 0, makeSpanView(&outImageBarrier, 1));
139 0 : }
140 :
141 0 : Vector<const vk::CommandBuffer *> InputLayer::LayerHandle::doPrepareCommands(FrameHandle &handle) {
142 0 : auto buf = _pool->recordBuffer(*_device, [&] (vk::CommandBuffer &buf) {
143 0 : auto pass = _data->impl.cast<vk::RenderPass>().get();
144 0 : pass->perform(*this, buf, [&] {
145 0 : auto data = static_cast<InputDataInput *>(_dataHandle->getInput());
146 0 : auto extent = _outputImage->getQueueData()->image->getImage()->getInfo().extent;
147 :
148 0 : doTransferInput(buf, static_cast<DeviceFrameHandle &>(handle), data);
149 :
150 0 : buf.cmdBindDescriptorSets(pass, 0);
151 0 : buf.cmdPushConstants(VK_SHADER_STAGE_COMPUTE_BIT, 0, BytesView(reinterpret_cast<uint8_t *>(&data->norm), sizeof(NormData)));
152 :
153 0 : auto pipeline = static_cast<vk::ComputePipeline *>((*_data->subpasses[0]->computePipelines.begin())->pipeline.get());
154 :
155 0 : buf.cmdBindPipeline(pipeline);
156 0 : buf.cmdDispatch((extent.width - 1) / pipeline->getLocalX() + 1,
157 0 : (extent.height - 1) / pipeline->getLocalY() + 1,
158 0 : (extent.depth - 1) / pipeline->getLocalZ() + 1);
159 0 : }, true);
160 0 : return true;
161 : });
162 0 : return Vector<const vk::CommandBuffer *>{buf};
163 : }
164 :
165 4 : InputBufferLayer::~InputBufferLayer() { }
166 :
167 2 : bool InputBufferLayer::init(Queue::Builder &queueBuilder, QueuePassBuilder &builder, Front *front, const AttachmentData *input, const AttachmentData *output) {
168 : using namespace core;
169 :
170 4 : auto dataBuffer = queueBuilder.addAttachemnt(toString(front->getName(), "_buffer"), [] (AttachmentBuilder &builder) -> Rc<core::Attachment> {
171 2 : builder.defineAsInput();
172 2 : auto a = Rc<BufferAttachment>::create(builder, BufferInfo(PassType::Compute, BufferUsage::StorageBuffer, BufferUsage::TransferDst));
173 2 : a->setValidateInputCallback([] (const Attachment &, const Rc<AttachmentInputData> &data) {
174 2400 : return dynamic_cast<InputBufferDataInput *>(data.get()) != nullptr;
175 : });
176 2 : a->setFrameHandleCallback([] (Attachment &a, const FrameQueue &q) {
177 2400 : auto h = Rc<BufferAttachmentHandle>::create(a, q);
178 2400 : h->setInputCallback([] (AttachmentHandle &handle, FrameQueue &queue, AttachmentInputData *input, Function<void(bool)> &&cb) {
179 2400 : cb(true);
180 2400 : });
181 2400 : return h;
182 0 : });
183 4 : return a;
184 2 : });
185 :
186 2 : builder.addAttachment(input);
187 2 : builder.addAttachment(output);
188 2 : auto passBuffers = builder.addAttachment(dataBuffer);
189 :
190 2 : auto layout = builder.addDescriptorLayout([&] (PipelineLayoutBuilder &layoutBuilder) {
191 2 : layoutBuilder.addSet([&] (DescriptorSetBuilder &setBuilder) {
192 4 : setBuilder.addDescriptorArray(passBuffers, 2, DescriptorType::StorageBuffer);
193 2 : });
194 4 : });
195 :
196 2 : builder.addSubpass([&] (SubpassBuilder &subpassBuilder) {
197 2 : subpassBuilder.addComputePipeline(toString(front->getName(), "_pipeline"), layout,
198 6 : SpecializationInfo(
199 4 : queueBuilder.addProgram(toString(front->getName(), "_program"),getShader(LayerShader::BufferNorm, Precision::Unknown)),
200 4 : Vector<SpecializationConstant>{
201 : SpecializationConstant(2), // nbuffers
202 : SpecializationConstant(0), // output
203 : SpecializationConstant(1) // input
204 : }));
205 2 : });
206 :
207 2 : _inputAttachment = input;
208 2 : _outputAttachment = output;
209 2 : _dataAttachment = dataBuffer;
210 2 : _front = front;
211 :
212 2400 : _frameHandleCallback = [] (core::QueuePass &pass, const FrameQueue &q) {
213 2400 : return Rc<LayerHandle>::create(pass, q);
214 2 : };
215 :
216 4 : return QueuePass::init(builder);
217 : }
218 :
219 2400 : bool InputBufferLayer::LayerHandle::prepare(FrameQueue &q, Function<void(bool)> &&cb) {
220 2400 : auto pass = (InputBufferLayer *)_queuePass.get();
221 :
222 2400 : if (auto inputAttachment = q.getAttachment(pass->getInputAttachment())) {
223 2400 : _inputBuffer = (vk::BufferAttachmentHandle *)inputAttachment->handle.get();
224 : }
225 :
226 2400 : if (auto outputAttachment = q.getAttachment(pass->getOutputAttachment())) {
227 2400 : _outputBuffer = (vk::BufferAttachmentHandle *)outputAttachment->handle.get();
228 : }
229 :
230 2400 : if (auto bufferAttachment = q.getAttachment(pass->getDataAttachment())) {
231 2400 : _dataHandle = (vk::BufferAttachmentHandle *)bufferAttachment->handle.get();
232 : }
233 :
234 2400 : _front = pass->getFront();
235 :
236 2400 : bool isOutput = _outputBuffer->isOutput();
237 2400 : auto input = static_cast<InputBufferDataInput *>(_dataHandle->getInput());
238 2400 : auto handle = static_cast<DeviceFrameHandle *>(q.getFrame().get());
239 2400 : auto &pool = handle->getMemPool(nullptr);
240 :
241 2400 : auto bufSize = _front->getBufferSize() * sizeof(float);
242 :
243 : // alloc staging buffer
244 : auto staging = pool->spawn(AllocationUsage::DeviceLocalHostVisible,
245 2400 : BufferInfo(core::BufferUsage::TransferSrc | core::BufferUsage::StorageBuffer,
246 4800 : size_t(bufSize)));
247 2400 : Rc<Buffer> dest;
248 2400 : if (isOutput) {
249 0 : dest = pool->spawnPersistent(AllocationUsage::DeviceLocalHostVisible,
250 0 : BufferInfo(core::BufferUsage::TransferSrc | core::BufferUsage::StorageBuffer,
251 0 : size_t(bufSize)));
252 : } else {
253 7200 : dest = pool->spawn(AllocationUsage::DeviceLocal,
254 4800 : BufferInfo(core::BufferUsage::TransferSrc | core::BufferUsage::StorageBuffer,
255 7200 : size_t(bufSize)));
256 : }
257 :
258 2400 : staging->map([&] (uint8_t *buf, VkDeviceSize size) {
259 2400 : input->buffer.writeData(buf, size);
260 2400 : });
261 :
262 2400 : _inputBuffer->addBufferView(staging);
263 2400 : _outputBuffer->addBufferView(dest);
264 :
265 2400 : _dataHandle->addBufferView(dest); // output
266 2400 : _dataHandle->addBufferView(staging); // input
267 :
268 4800 : return vk::QueuePassHandle::prepare(q, move(cb));
269 2400 : }
270 :
271 2400 : Vector<const vk::CommandBuffer *> InputBufferLayer::LayerHandle::doPrepareCommands(FrameHandle &handle) {
272 2400 : auto buf = _pool->recordBuffer(*_device, [&] (vk::CommandBuffer &buf) {
273 2400 : auto pass = _data->impl.cast<vk::RenderPass>().get();
274 2400 : pass->perform(*this, buf, [&] {
275 : struct NormBufferData {
276 : int32_t size;
277 : float mean;
278 : float norm;
279 : };
280 :
281 2400 : NormBufferData pcb{int32_t(_front->getBufferSize()), _front->getMean(), _front->getNorm()};
282 :
283 2400 : buf.cmdBindDescriptorSets(pass, 0);
284 2400 : buf.cmdPushConstants(VK_SHADER_STAGE_COMPUTE_BIT, 0, BytesView(reinterpret_cast<uint8_t *>(&pcb), sizeof(NormBufferData)));
285 :
286 2400 : auto pipeline = static_cast<vk::ComputePipeline *>((*_data->subpasses[0]->computePipelines.begin())->pipeline.get());
287 :
288 2400 : auto nbatches = (_front->getBufferSize() - 1) / pipeline->getLocalX() + 1;
289 :
290 2400 : buf.cmdBindPipeline(pipeline);
291 2400 : buf.cmdDispatch(nbatches, 1, 1);
292 :
293 2400 : BufferMemoryBarrier barrier(_outputBuffer->getBuffers().front().buffer, VkAccessFlags(core::AccessType::ShaderWrite | core::AccessType::ShaderRead),
294 4800 : VkAccessFlags(core::AccessType::ShaderWrite | core::AccessType::ShaderRead));
295 2400 : buf.cmdPipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, makeSpanView(&barrier, 1));
296 2400 : }, true);
297 2400 : return true;
298 : });
299 2400 : return Vector<const vk::CommandBuffer *>{buf};
300 : }
301 :
302 0 : InputCsvIntLayer::~InputCsvIntLayer() { }
303 :
304 0 : bool InputCsvIntLayer::init(Queue::Builder &queueBuilder, QueuePassBuilder &builder, Front *front,
305 : const AttachmentData *input, const AttachmentData *output) {
306 : using namespace core;
307 :
308 0 : auto normBuffer = queueBuilder.addBufferByRef(toString(front->getName(), "_normBuffer"),
309 0 : BufferInfo(BufferUsage::StorageBuffer, BufferPersistent(true), PassType::Compute),
310 0 : front->getNormDataBuffer(), nullptr, AccessType::ShaderRead);
311 :
312 0 : auto dataBuffer = queueBuilder.addAttachemnt("InputCsvIntLayerBuffer", [&] (AttachmentBuilder &builder) -> Rc<core::Attachment> {
313 0 : builder.defineAsInput();
314 0 : auto a = Rc<BufferAttachment>::create(builder, normBuffer);
315 0 : a->setValidateInputCallback([] (const Attachment &, const Rc<AttachmentInputData> &data) {
316 0 : return dynamic_cast<InputCsvInput *>(data.get()) != nullptr;
317 : });
318 0 : a->setFrameHandleCallback([] (Attachment &a, const FrameQueue &q) {
319 0 : auto h = Rc<BufferAttachmentHandle>::create(a, q);
320 0 : h->setInputCallback([] (AttachmentHandle &handle, FrameQueue &queue, AttachmentInputData *input, Function<void(bool)> &&cb) {
321 0 : cb(true);
322 0 : });
323 0 : return h;
324 0 : });
325 0 : return a;
326 0 : });
327 :
328 0 : auto passInput = builder.addAttachment(input);
329 0 : auto passOutput = builder.addAttachment(output);
330 0 : auto passData = builder.addAttachment(dataBuffer);
331 :
332 0 : auto layout = builder.addDescriptorLayout([&] (PipelineLayoutBuilder &layoutBuilder) {
333 0 : layoutBuilder.addSet([&] (DescriptorSetBuilder &setBuilder) {
334 0 : setBuilder.addDescriptor(passOutput, DescriptorType::StorageBuffer);
335 0 : setBuilder.addDescriptor(passInput, DescriptorType::StorageBuffer);
336 0 : setBuilder.addDescriptor(passData, DescriptorType::StorageBuffer);
337 0 : });
338 0 : });
339 :
340 0 : builder.addSubpass([&] (SubpassBuilder &subpassBuilder) {
341 0 : subpassBuilder.addComputePipeline("InputCsvIntPipeline", layout,
342 0 : queueBuilder.addProgramByRef("InputCsvIntPProgram", getShader(LayerShader::StatNorm, Precision::Unknown)));
343 0 : });
344 :
345 0 : _inputAttachment = input;
346 0 : _outputAttachment = output;
347 0 : _dataAttachment = dataBuffer;
348 0 : _front = front;
349 :
350 0 : _frameHandleCallback = [] (core::QueuePass &pass, const FrameQueue &q) {
351 0 : return Rc<LayerHandle>::create(pass, q);
352 0 : };
353 :
354 0 : return QueuePass::init(builder);
355 : }
356 :
357 0 : bool InputCsvIntLayer::LayerHandle::prepare(FrameQueue &q, Function<void(bool)> &&cb) {
358 0 : auto pass = (InputCsvIntLayer *)_queuePass.get();
359 :
360 0 : if (auto inputAttachment = q.getAttachment(pass->getInputAttachment())) {
361 0 : _inputBuffer = (vk::BufferAttachmentHandle *)inputAttachment->handle.get();
362 : }
363 :
364 0 : if (auto outputAttachment = q.getAttachment(pass->getOutputAttachment())) {
365 0 : _outputBuffer = (vk::BufferAttachmentHandle *)outputAttachment->handle.get();
366 : }
367 :
368 0 : if (auto bufferAttachment = q.getAttachment(pass->getDataAttachment())) {
369 0 : _dataHandle = (vk::BufferAttachmentHandle *)bufferAttachment->handle.get();
370 : }
371 :
372 0 : _front = pass->getFront();
373 :
374 0 : auto input = static_cast<InputCsvInput *>(_dataHandle->getInput());
375 0 : auto handle = static_cast<DeviceFrameHandle *>(q.getFrame().get());
376 0 : auto &pool = handle->getMemPool(nullptr);
377 :
378 0 : auto bufferSize = sizeof(uint64_t) * _front->getFields().size() * input->data.size();
379 :
380 : // alloc staging buffer
381 : auto staging = pool->spawn(AllocationUsage::DeviceLocalHostVisible,
382 0 : BufferInfo(core::BufferUsage::TransferSrc | core::BufferUsage::StorageBuffer,
383 0 : size_t(bufferSize)));
384 : auto dest = pool->spawn(AllocationUsage::DeviceLocal,
385 0 : BufferInfo(core::BufferUsage::TransferSrc | core::BufferUsage::StorageBuffer,
386 0 : size_t(bufferSize)));
387 :
388 0 : staging->map([&] (uint8_t *buf, VkDeviceSize size) {
389 0 : auto target = (uint64_t *)buf;
390 :
391 0 : for (auto &it : input->data) {
392 0 : for (auto &f : _front->getFields()) {
393 0 : *target = it.getInteger(f);
394 0 : ++ target;
395 : }
396 : }
397 0 : });
398 :
399 0 : _inputBuffer->addBufferView(staging);
400 0 : _outputBuffer->addBufferView(dest);
401 :
402 0 : return vk::QueuePassHandle::prepare(q, move(cb));
403 0 : }
404 :
405 0 : Vector<const vk::CommandBuffer *> InputCsvIntLayer::LayerHandle::doPrepareCommands(FrameHandle &handle) {
406 0 : auto buf = _pool->recordBuffer(*_device, [&] (vk::CommandBuffer &buf) {
407 0 : auto pass = _data->impl.cast<vk::RenderPass>().get();
408 0 : pass->perform(*this, buf, [&] {
409 : struct InputInfo {
410 : int size;
411 : int fields;
412 : };
413 :
414 0 : auto input = static_cast<InputCsvInput *>(_dataHandle->getInput());
415 0 : InputInfo pcb{int(input->data.size()), int(_front->getFields().size())};
416 :
417 0 : buf.cmdBindDescriptorSets(pass, 0);
418 0 : buf.cmdPushConstants(VK_SHADER_STAGE_COMPUTE_BIT, 0, BytesView(reinterpret_cast<uint8_t *>(&pcb), sizeof(InputInfo)));
419 :
420 0 : auto pipeline = static_cast<vk::ComputePipeline *>((*_data->subpasses[0]->computePipelines.begin())->pipeline.get());
421 :
422 0 : auto nbatches = (pcb.size - 1) / pipeline->getLocalY() + 1;
423 :
424 0 : buf.cmdBindPipeline(pipeline);
425 0 : buf.cmdDispatch(pcb.fields, nbatches, 1);
426 0 : }, true);
427 0 : return true;
428 : });
429 0 : return Vector<const vk::CommandBuffer *>{buf};
430 : }
431 :
432 : }
|