Line data Source code
1 : /**
2 : Copyright (c) 2023 Stappler LLC <admin@stappler.dev>
3 :
4 : Permission is hereby granted, free of charge, to any person obtaining a copy
5 : of this software and associated documentation files (the "Software"), to deal
6 : in the Software without restriction, including without limitation the rights
7 : to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 : copies of the Software, and to permit persons to whom the Software is
9 : furnished to do so, subject to the following conditions:
10 :
11 : The above copyright notice and this permission notice shall be included in
12 : all copies or substantial portions of the Software.
13 :
14 : THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 : IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 : FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 : AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 : LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 : OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 : THE SOFTWARE.
21 : **/
22 :
23 : #include "XLSnnConvLayer.h"
24 : #include "XLSnnVkConvLayer.h"
25 :
26 : namespace stappler::xenolith::shadernn {
27 :
28 : // one vec4 for shader
29 : static Vector<float> s_InvalidVector{ 0.0f, 0.0f, 0.0f, 0.0f };
30 :
31 0 : Extent3 ConvLayer::getKernelExtent() const {
32 0 : uint32_t unit = 4;
33 0 : uint32_t ic_4 = UP_DIV(_numInputPlanes, unit);
34 0 : uint32_t oc_4 = UP_DIV(_numOutputPlanes, unit);
35 :
36 0 : return Extent3(ic_4 * unit, oc_4, (uint32_t) (_kernelSize * _kernelSize));
37 : }
38 :
39 0 : BytesView ConvLayer::getKernelImageData() const {
40 0 : if (_model->isHalfPrecision()) {
41 0 : return BytesView((const uint8_t *)_weightsDataF16.data(), _weightsDataF16.size() * sizeof(uint16_t));
42 : } else {
43 0 : return BytesView((const uint8_t *)_weightsData.data(), _weightsData.size() * sizeof(float));
44 : }
45 : }
46 :
47 0 : BytesView ConvLayer::getBiasBufferData() const {
48 0 : if (!_biases.empty()) {
49 0 : return BytesView((const uint8_t *)_biases.data(), _biases.size() * sizeof(float));
50 : }
51 0 : return BytesView((const uint8_t *)s_InvalidVector.data(), s_InvalidVector.size() * sizeof(float));
52 : }
53 :
54 0 : bool Conv2DLayer::oihw2hwo4i4(const Vector<MatVec>& inputWeights, Vector<float>& outVec, int inChannels, int outChannels, int fw, int fh, int unit) {
55 0 : int alignedWeightSize = ROUND_UP(outChannels, unit) * fw * fh * ROUND_UP(inChannels, unit);
56 :
57 : // SNN_LOGD("inChannels = %d, outChannels = %d, fw = %d, fh = %d, all: %d", inChannels, outChannels, fw, fh, alignedWeightSize);
58 :
59 0 : outVec.clear();
60 0 : outVec.resize(alignedWeightSize);
61 0 : float *out = (float*) outVec.data();
62 0 : int planeSize = ROUND_UP(outChannels, unit) * ROUND_UP(inChannels, unit);
63 0 : memset(out, 0, alignedWeightSize * sizeof(float));
64 0 : for (int b = 0; b < outChannels; ++b) {
65 0 : int b_4 = b / unit;
66 0 : int mx = b % unit;
67 0 : for (int d = 0; d < inChannels; ++d) {
68 0 : for (int y = 0; y < fh; ++y) {
69 0 : for (int x = 0; x < fw; ++x) {
70 0 : int base = (y * fw + x) * planeSize;
71 0 : int inSize = ROUND_UP(inChannels, unit) * unit;
72 0 : out[base + inSize * b_4 + d * unit + mx] = inputWeights[b * inChannels + d].at(y * fw + x);
73 : }
74 : }
75 : }
76 : }
77 0 : return 0;
78 : }
79 :
80 0 : bool Conv2DLayer::init(Model *m, StringView tag, size_t idx, const Value &data) {
81 0 : if (!ConvLayer::init(m, tag, idx, data)) {
82 0 : return false;
83 : }
84 :
85 0 : _activation = getActivationValue(data.getString("activation"));
86 0 : _kernelSize = data.getInteger("kernel_size");
87 0 : _stride = data.getInteger("strides");
88 :
89 0 : if (data.isArray("padding")) {
90 0 : auto &arr = data.getValue("padding");
91 0 : auto &upPadding = arr.getValue(0);
92 0 : auto &sidePadding = arr.getValue(1);
93 :
94 0 : if (upPadding.isArray()) {
95 0 : _paddingT = upPadding.getInteger(0);
96 0 : _paddingB = upPadding.getInteger(1);
97 : } else {
98 0 : _paddingT = _paddingB = upPadding.getInteger();
99 : }
100 0 : if (sidePadding.isArray()) {
101 0 : _paddingL = sidePadding.getInteger(0);
102 0 : _paddingR = sidePadding.getInteger(1);
103 : } else {
104 0 : _paddingL = _paddingR = sidePadding.getInteger();
105 : }
106 :
107 0 : _paddingMode = data.getString("mode");
108 0 : } else if (data.isString("padding")) {
109 0 : _paddingValue = data.getString("padding");
110 : }
111 :
112 0 : if (data.hasValue("use_multi_inputs")) {
113 0 : _useMultiInputs = data.getString("use_multi_inputs") == "True";
114 : } else {
115 0 : _useMultiInputs = false;
116 : }
117 :
118 0 : auto readFloatValue = [&] (Value::ArrayType::const_iterator &arrIt) -> float {
119 0 : auto value = static_cast<float>((arrIt++)->getDouble());
120 0 : if (_model->isHalfPrecision()) {
121 0 : value = convertToMediumPrecision(value);
122 : }
123 0 : return value;
124 0 : };
125 :
126 0 : auto & weightObj = data.getValue("weights");
127 :
128 0 : _weightsCvM = Vector<MatVec>(_numInputPlanes * _numOutputPlanes, MatVec(Extent2(_kernelSize, _kernelSize)));
129 0 : auto matrixIt = _weightsCvM.begin();
130 0 : if (_model->usesDataFile()) {
131 0 : for (uint32_t i = 0; i < _numOutputPlanes; ++i) {
132 0 : for (uint32_t j = 0; j < _numInputPlanes; ++j) {
133 0 : for (uint32_t writingRow = 0; writingRow < _kernelSize; ++writingRow) {
134 0 : for (uint32_t writingCol = 0; writingCol < _kernelSize; ++writingCol) {
135 0 : matrixIt->set(writingRow, writingCol, _model->readFloatData());
136 : }
137 : }
138 0 : matrixIt++;
139 : }
140 : }
141 : } else {
142 0 : auto &arr = weightObj.getArray("kernel");
143 0 : auto arrIt = arr.begin();
144 0 : for (uint32_t i = 0; i < _numOutputPlanes; ++i) {
145 0 : for (uint32_t j = 0; j < _numInputPlanes; ++j) {
146 0 : for (uint32_t writingRow = 0; writingRow < _kernelSize; ++writingRow) {
147 0 : for (uint32_t writingCol = 0; writingCol < _kernelSize; ++writingCol) {
148 0 : auto value = readFloatValue(arrIt);
149 0 : matrixIt->set(writingRow, writingCol, value);
150 : }
151 : }
152 0 : matrixIt++;
153 : }
154 : }
155 : }
156 :
157 0 : oihw2hwo4i4(_weightsCvM, _weightsData, _numInputPlanes, _numOutputPlanes, _kernelSize, _kernelSize);
158 :
159 0 : if (_model->isHalfPrecision()) {
160 0 : _weightsDataF16.reserve(_weightsData.size());
161 0 : for (auto &it : _weightsData) {
162 0 : _weightsDataF16.emplace_back(halffloat::encode(it));
163 : }
164 : }
165 :
166 0 : _biases.resize(_numOutputPlanes, 0.0);
167 0 : if (data.getString("useBias") == "True") {
168 0 : if (_model->usesDataFile()) {
169 0 : for (uint32_t i = 0; i < _numOutputPlanes; i++) {
170 0 : _biases[i] = _model->readFloatData();
171 : }
172 : } else {
173 0 : auto &arr = weightObj.getArray("bias");
174 0 : auto arrIt = arr.begin();
175 0 : for (uint32_t i = 0; i < _numOutputPlanes; i++) {
176 0 : _biases[i] = readFloatValue(arrIt);
177 : }
178 : }
179 : }
180 :
181 0 : _useBatchNormalization = (data.getString("useBatchNormalization") == "True") ? true : false;
182 0 : if (_useBatchNormalization) {
183 0 : _batchNormalization.gamma.resize(_numOutputPlanes, 0.0f);
184 0 : _batchNormalization.beta.resize(_numOutputPlanes, 0.0f);
185 0 : _batchNormalization.mean.resize(_numOutputPlanes, 0.0f);
186 0 : _batchNormalization.variance.resize(_numOutputPlanes, 0.0f);
187 :
188 0 : auto &batchNormObj = data.getValue("batchNormalization");
189 0 : if (_model->usesDataFile()) {
190 0 : for (uint32_t i = 0; i < _numOutputPlanes; i++) {
191 0 : _batchNormalization.gamma[i] = _model->readFloatData();
192 : }
193 0 : for (uint32_t i = 0; i < _numOutputPlanes; i++) {
194 0 : _batchNormalization.beta[i] = _model->readFloatData();
195 : }
196 0 : for (uint32_t i = 0; i < _numOutputPlanes; i++) {
197 0 : _batchNormalization.mean[i] = _model->readFloatData();
198 : }
199 0 : for (uint32_t i = 0; i < _numOutputPlanes; i++) {
200 0 : _batchNormalization.variance[i] = _model->readFloatData();
201 : }
202 : } else {
203 0 : auto betaArray = batchNormObj.getArray("beta").begin();
204 0 : auto gammaArray = batchNormObj.getArray("gamma").begin();
205 0 : auto movingMean = batchNormObj.getArray(batchNormObj.hasValue("moving_mean") ? "moving_mean" : "movingMean").begin();
206 0 : auto movingVariance = batchNormObj.getArray(batchNormObj.hasValue("moving_variance") ? "moving_variance" : "movingVariance").begin();
207 :
208 0 : for (uint32_t i = 0; i < _numOutputPlanes; i++) {
209 0 : _batchNormalization.beta[i] = readFloatValue(betaArray);
210 0 : _batchNormalization.gamma[i] = readFloatValue(gammaArray);
211 0 : _batchNormalization.mean[i] = readFloatValue(movingMean);
212 0 : _batchNormalization.variance[i] = readFloatValue(movingVariance);
213 : }
214 : }
215 : }
216 :
217 0 : if (_activation == Activation::LEAKYRELU) {
218 0 : if (data.hasValue("leakyReluAlpha")) {
219 0 : _leakyReluAlpha = data.getDouble("leakyReluAlpha");
220 : } else {
221 0 : _leakyReluAlpha = data.getDouble("alpha");
222 : }
223 0 : if (_model->isHalfPrecision()) {
224 0 : _leakyReluAlpha = convertToMediumPrecision(_leakyReluAlpha);
225 : }
226 : }
227 :
228 0 : return true;
229 : }
230 :
231 0 : LayerTransformInfo Conv2DLayer::getOutputTransform() const {
232 0 : auto offset = getPaddingOffset();
233 0 : float scale = 1 / static_cast<float>(_stride);
234 0 : float translation = 0.0f;
235 0 : if (_kernelSize % 2 != 0) {
236 0 : translation = 1 + (static_cast<float>(offset.x + offset.y) - static_cast<float>(_kernelSize)) / static_cast<float>(_stride);
237 : } else {
238 0 : translation = 1 + (static_cast<float>(offset.x + offset.y - 1) - static_cast<float>(_kernelSize)) / static_cast<float>(_stride);
239 : }
240 0 : return {0, { {scale, scale, translation, translation}}};
241 : }
242 :
243 0 : BytesView Conv2DLayer::getNormBetaBufferData() const {
244 0 : if (!_batchNormalization.beta.empty()) {
245 0 : return BytesView((const uint8_t *)_batchNormalization.beta.data(), _batchNormalization.beta.size() * sizeof(float));
246 : }
247 0 : return BytesView((const uint8_t *)s_InvalidVector.data(), s_InvalidVector.size() * sizeof(float));
248 : }
249 :
250 0 : BytesView Conv2DLayer::getNormGammaBufferData() const {
251 0 : if (!_batchNormalization.gamma.empty()) {
252 0 : return BytesView((const uint8_t *)_batchNormalization.gamma.data(), _batchNormalization.gamma.size() * sizeof(float));
253 : }
254 0 : return BytesView((const uint8_t *)s_InvalidVector.data(), s_InvalidVector.size() * sizeof(float));
255 : }
256 :
257 0 : BytesView Conv2DLayer::getNormMeanBufferData() const {
258 0 : if (!_batchNormalization.mean.empty()) {
259 0 : return BytesView((const uint8_t *)_batchNormalization.mean.data(), _batchNormalization.mean.size() * sizeof(float));
260 : }
261 0 : return BytesView((const uint8_t *)s_InvalidVector.data(), s_InvalidVector.size() * sizeof(float));
262 : }
263 :
264 0 : BytesView Conv2DLayer::getNormVarianceBufferData() const {
265 0 : if (!_batchNormalization.variance.empty()) {
266 0 : return BytesView((const uint8_t *)_batchNormalization.variance.data(), _batchNormalization.variance.size() * sizeof(float));
267 : }
268 0 : return BytesView((const uint8_t *)s_InvalidVector.data(), s_InvalidVector.size() * sizeof(float));
269 : }
270 :
271 0 : UVec4 Conv2DLayer::getPaddingOffset() const {
272 : UVec4 offsets;
273 0 : if (_paddingValue.empty()) {
274 0 : offsets.x = _paddingT;
275 0 : offsets.y = _paddingB;
276 0 : offsets.z = _paddingL;
277 0 : offsets.w = _paddingR;
278 : } else {
279 0 : if (_paddingValue == "valid" || _paddingValue == "none") {
280 0 : offsets.x = 0;
281 0 : offsets.y = 0;
282 0 : offsets.z = 0;
283 0 : offsets.w = 0;
284 : } else {
285 0 : if (_kernelSize > 1) {
286 0 : offsets.x = std::max(static_cast<uint32_t>(_kernelSize / 2), (uint32_t) 1);
287 0 : offsets.y = std::max(static_cast<uint32_t>(_kernelSize / 2), (uint32_t) 1);
288 0 : offsets.z = std::max(static_cast<uint32_t>(_kernelSize / 2), (uint32_t) 1);
289 0 : offsets.w = std::max(static_cast<uint32_t>(_kernelSize / 2), (uint32_t) 1);
290 0 : if (_kernelSize % 2 == 0) {
291 0 : offsets.x = offsets.x - 1;
292 0 : offsets.z = offsets.z - 1;
293 : }
294 : } else {
295 0 : offsets.x = 0;
296 0 : offsets.y = 0;
297 0 : offsets.z = 0;
298 0 : offsets.w = 0;
299 : }
300 : }
301 : }
302 0 : return offsets;
303 : }
304 :
305 0 : const core::QueuePassData *Conv2DLayer::prepare(core::Queue::Builder &builder,
306 : Map<Layer *, const core::AttachmentData *> inputs,
307 : Map<Attachment *, const core::AttachmentData *> attachments) {
308 :
309 0 : auto inputIt = attachments.find(_inputs.front().attachment);
310 0 : auto outputIt = attachments.find(getOutput());
311 :
312 0 : if (inputIt == attachments.end() || outputIt == attachments.end()) {
313 0 : log::error("snn::InputLayer", "No attachments specified");
314 0 : return nullptr;
315 : }
316 :
317 0 : return builder.addPass(getName(), core::PassType::Compute, core::RenderOrdering(_inputIndex),
318 0 : [&] (core::QueuePassBuilder &passBuilder) -> Rc<core::QueuePass> {
319 0 : return Rc<vk::shadernn::Conv2DLayer>::create(builder, passBuilder, this, inputIt->second, outputIt->second);
320 0 : });
321 : }
322 :
323 : }
|