LCOV - code coverage report
Current view: top level - xenolith/utils/shadernn/src/layers - XLSnnConvLayer.cc (source / functions) Hit Total Coverage
Test: coverage.info Lines: 0 183 0.0 %
Date: 2024-05-06 04:51:23 Functions: 0 14 0.0 %

          Line data    Source code
       1             : /**
       2             :  Copyright (c) 2023 Stappler LLC <admin@stappler.dev>
       3             : 
       4             :  Permission is hereby granted, free of charge, to any person obtaining a copy
       5             :  of this software and associated documentation files (the "Software"), to deal
       6             :  in the Software without restriction, including without limitation the rights
       7             :  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
       8             :  copies of the Software, and to permit persons to whom the Software is
       9             :  furnished to do so, subject to the following conditions:
      10             : 
      11             :  The above copyright notice and this permission notice shall be included in
      12             :  all copies or substantial portions of the Software.
      13             : 
      14             :  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
      15             :  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
      16             :  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
      17             :  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
      18             :  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
      19             :  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
      20             :  THE SOFTWARE.
      21             :  **/
      22             : 
      23             : #include "XLSnnConvLayer.h"
      24             : #include "XLSnnVkConvLayer.h"
      25             : 
      26             : namespace stappler::xenolith::shadernn {
      27             : 
      28             : // one vec4 for shader
      29             : static Vector<float> s_InvalidVector{ 0.0f, 0.0f, 0.0f, 0.0f };
      30             : 
      31           0 : Extent3 ConvLayer::getKernelExtent() const {
      32           0 :         uint32_t unit = 4;
      33           0 :         uint32_t ic_4 = UP_DIV(_numInputPlanes, unit);
      34           0 :         uint32_t oc_4 = UP_DIV(_numOutputPlanes, unit);
      35             : 
      36           0 :         return Extent3(ic_4 * unit, oc_4, (uint32_t) (_kernelSize * _kernelSize));
      37             : }
      38             : 
      39           0 : BytesView ConvLayer::getKernelImageData() const {
      40           0 :         if (_model->isHalfPrecision()) {
      41           0 :                 return BytesView((const uint8_t *)_weightsDataF16.data(), _weightsDataF16.size() * sizeof(uint16_t));
      42             :         } else {
      43           0 :                 return BytesView((const uint8_t *)_weightsData.data(), _weightsData.size() * sizeof(float));
      44             :         }
      45             : }
      46             : 
      47           0 : BytesView ConvLayer::getBiasBufferData() const {
      48           0 :         if (!_biases.empty()) {
      49           0 :                 return BytesView((const uint8_t *)_biases.data(), _biases.size() * sizeof(float));
      50             :         }
      51           0 :         return BytesView((const uint8_t *)s_InvalidVector.data(), s_InvalidVector.size() * sizeof(float));
      52             : }
      53             : 
      54           0 : bool Conv2DLayer::oihw2hwo4i4(const Vector<MatVec>& inputWeights, Vector<float>& outVec, int inChannels, int outChannels, int fw, int fh, int unit) {
      55           0 :         int alignedWeightSize = ROUND_UP(outChannels, unit) * fw * fh * ROUND_UP(inChannels, unit);
      56             : 
      57             :         // SNN_LOGD("inChannels = %d, outChannels = %d, fw = %d, fh = %d, all: %d", inChannels, outChannels, fw, fh, alignedWeightSize);
      58             : 
      59           0 :         outVec.clear();
      60           0 :         outVec.resize(alignedWeightSize);
      61           0 :         float *out = (float*) outVec.data();
      62           0 :         int planeSize = ROUND_UP(outChannels, unit) * ROUND_UP(inChannels, unit);
      63           0 :         memset(out, 0, alignedWeightSize * sizeof(float));
      64           0 :         for (int b = 0; b < outChannels; ++b) {
      65           0 :                 int b_4 = b / unit;
      66           0 :                 int mx = b % unit;
      67           0 :                 for (int d = 0; d < inChannels; ++d) {
      68           0 :                         for (int y = 0; y < fh; ++y) {
      69           0 :                                 for (int x = 0; x < fw; ++x) {
      70           0 :                                         int base = (y * fw + x) * planeSize;
      71           0 :                                         int inSize = ROUND_UP(inChannels, unit) * unit;
      72           0 :                                         out[base + inSize * b_4 + d * unit + mx] = inputWeights[b * inChannels + d].at(y * fw + x);
      73             :                                 }
      74             :                         }
      75             :                 }
      76             :         }
      77           0 :         return 0;
      78             : }
      79             : 
      80           0 : bool Conv2DLayer::init(Model *m, StringView tag, size_t idx, const Value &data) {
      81           0 :         if (!ConvLayer::init(m, tag, idx, data)) {
      82           0 :                 return false;
      83             :         }
      84             : 
      85           0 :         _activation = getActivationValue(data.getString("activation"));
      86           0 :     _kernelSize = data.getInteger("kernel_size");
      87           0 :     _stride = data.getInteger("strides");
      88             : 
      89           0 :         if (data.isArray("padding")) {
      90           0 :                 auto &arr = data.getValue("padding");
      91           0 :         auto &upPadding = arr.getValue(0);
      92           0 :         auto &sidePadding = arr.getValue(1);
      93             : 
      94           0 :         if (upPadding.isArray()) {
      95           0 :                 _paddingT = upPadding.getInteger(0);
      96           0 :                 _paddingB = upPadding.getInteger(1);
      97             :         } else {
      98           0 :                 _paddingT = _paddingB = upPadding.getInteger();
      99             :         }
     100           0 :         if (sidePadding.isArray()) {
     101           0 :                 _paddingL = sidePadding.getInteger(0);
     102           0 :                 _paddingR = sidePadding.getInteger(1);
     103             :         } else {
     104           0 :                 _paddingL = _paddingR = sidePadding.getInteger();
     105             :         }
     106             : 
     107           0 :                 _paddingMode = data.getString("mode");
     108           0 :         } else if (data.isString("padding")) {
     109           0 :                 _paddingValue = data.getString("padding");
     110             :         }
     111             : 
     112           0 :         if (data.hasValue("use_multi_inputs")) {
     113           0 :                 _useMultiInputs = data.getString("use_multi_inputs") == "True";
     114             :         } else {
     115           0 :                 _useMultiInputs = false;
     116             :         }
     117             : 
     118           0 :         auto readFloatValue = [&] (Value::ArrayType::const_iterator &arrIt) -> float {
     119           0 :                 auto value = static_cast<float>((arrIt++)->getDouble());
     120           0 :                 if (_model->isHalfPrecision()) {
     121           0 :                         value = convertToMediumPrecision(value);
     122             :                 }
     123           0 :                 return value;
     124           0 :         };
     125             : 
     126           0 :     auto & weightObj = data.getValue("weights");
     127             : 
     128           0 :     _weightsCvM = Vector<MatVec>(_numInputPlanes * _numOutputPlanes, MatVec(Extent2(_kernelSize, _kernelSize)));
     129           0 :     auto matrixIt = _weightsCvM.begin();
     130           0 :         if (_model->usesDataFile()) {
     131           0 :                 for (uint32_t i = 0; i < _numOutputPlanes; ++i) {
     132           0 :                         for (uint32_t j = 0; j < _numInputPlanes; ++j) {
     133           0 :                                 for (uint32_t writingRow = 0; writingRow < _kernelSize; ++writingRow) {
     134           0 :                                         for (uint32_t writingCol = 0; writingCol < _kernelSize; ++writingCol) {
     135           0 :                                                 matrixIt->set(writingRow, writingCol, _model->readFloatData());
     136             :                                         }
     137             :                                 }
     138           0 :                                 matrixIt++;
     139             :                         }
     140             :                 }
     141             :         } else {
     142           0 :                 auto &arr = weightObj.getArray("kernel");
     143           0 :                 auto arrIt = arr.begin();
     144           0 :                 for (uint32_t i = 0; i < _numOutputPlanes; ++i) {
     145           0 :                         for (uint32_t j = 0; j < _numInputPlanes; ++j) {
     146           0 :                                 for (uint32_t writingRow = 0; writingRow < _kernelSize; ++writingRow) {
     147           0 :                                         for (uint32_t writingCol = 0; writingCol < _kernelSize; ++writingCol) {
     148           0 :                                                 auto value = readFloatValue(arrIt);
     149           0 :                                                 matrixIt->set(writingRow, writingCol, value);
     150             :                                         }
     151             :                                 }
     152           0 :                                 matrixIt++;
     153             :                         }
     154             :                 }
     155             :         }
     156             : 
     157           0 :         oihw2hwo4i4(_weightsCvM, _weightsData, _numInputPlanes, _numOutputPlanes, _kernelSize, _kernelSize);
     158             : 
     159           0 :         if (_model->isHalfPrecision()) {
     160           0 :                 _weightsDataF16.reserve(_weightsData.size());
     161           0 :                 for (auto &it : _weightsData) {
     162           0 :                         _weightsDataF16.emplace_back(halffloat::encode(it));
     163             :                 }
     164             :         }
     165             : 
     166           0 :         _biases.resize(_numOutputPlanes, 0.0);
     167           0 :         if (data.getString("useBias") == "True") {
     168           0 :                 if (_model->usesDataFile()) {
     169           0 :                         for (uint32_t i = 0; i < _numOutputPlanes; i++) {
     170           0 :                                 _biases[i] = _model->readFloatData();
     171             :                         }
     172             :                 } else {
     173           0 :                         auto &arr = weightObj.getArray("bias");
     174           0 :                         auto arrIt = arr.begin();
     175           0 :                         for (uint32_t i = 0; i < _numOutputPlanes; i++) {
     176           0 :                                 _biases[i] = readFloatValue(arrIt);
     177             :                         }
     178             :                 }
     179             :         }
     180             : 
     181           0 :     _useBatchNormalization = (data.getString("useBatchNormalization") == "True") ? true : false;
     182           0 :         if (_useBatchNormalization) {
     183           0 :                 _batchNormalization.gamma.resize(_numOutputPlanes, 0.0f);
     184           0 :                 _batchNormalization.beta.resize(_numOutputPlanes, 0.0f);
     185           0 :                 _batchNormalization.mean.resize(_numOutputPlanes, 0.0f);
     186           0 :                 _batchNormalization.variance.resize(_numOutputPlanes, 0.0f);
     187             : 
     188           0 :                 auto &batchNormObj = data.getValue("batchNormalization");
     189           0 :                 if (_model->usesDataFile()) {
     190           0 :                         for (uint32_t i = 0; i < _numOutputPlanes; i++) {
     191           0 :                                 _batchNormalization.gamma[i] = _model->readFloatData();
     192             :                         }
     193           0 :                         for (uint32_t i = 0; i < _numOutputPlanes; i++) {
     194           0 :                                 _batchNormalization.beta[i] = _model->readFloatData();
     195             :                         }
     196           0 :                         for (uint32_t i = 0; i < _numOutputPlanes; i++) {
     197           0 :                                 _batchNormalization.mean[i] = _model->readFloatData();
     198             :                         }
     199           0 :                         for (uint32_t i = 0; i < _numOutputPlanes; i++) {
     200           0 :                                 _batchNormalization.variance[i] = _model->readFloatData();
     201             :                         }
     202             :                 } else {
     203           0 :                         auto betaArray = batchNormObj.getArray("beta").begin();
     204           0 :                         auto gammaArray = batchNormObj.getArray("gamma").begin();
     205           0 :                         auto movingMean = batchNormObj.getArray(batchNormObj.hasValue("moving_mean") ? "moving_mean" : "movingMean").begin();
     206           0 :                         auto movingVariance = batchNormObj.getArray(batchNormObj.hasValue("moving_variance") ? "moving_variance" : "movingVariance").begin();
     207             : 
     208           0 :                         for (uint32_t i = 0; i < _numOutputPlanes; i++) {
     209           0 :                                 _batchNormalization.beta[i] = readFloatValue(betaArray);
     210           0 :                                 _batchNormalization.gamma[i] = readFloatValue(gammaArray);
     211           0 :                                 _batchNormalization.mean[i] = readFloatValue(movingMean);
     212           0 :                                 _batchNormalization.variance[i] = readFloatValue(movingVariance);
     213             :                         }
     214             :                 }
     215             :         }
     216             : 
     217           0 :         if (_activation == Activation::LEAKYRELU) {
     218           0 :                 if (data.hasValue("leakyReluAlpha")) {
     219           0 :                         _leakyReluAlpha = data.getDouble("leakyReluAlpha");
     220             :                 } else {
     221           0 :                         _leakyReluAlpha = data.getDouble("alpha");
     222             :                 }
     223           0 :                 if (_model->isHalfPrecision()) {
     224           0 :                         _leakyReluAlpha = convertToMediumPrecision(_leakyReluAlpha);
     225             :                 }
     226             :         }
     227             : 
     228           0 :         return true;
     229             : }
     230             : 
     231           0 : LayerTransformInfo Conv2DLayer::getOutputTransform() const {
     232           0 :         auto offset = getPaddingOffset();
     233           0 :         float scale = 1 / static_cast<float>(_stride);
     234           0 :         float translation = 0.0f;
     235           0 :         if (_kernelSize % 2 != 0) {
     236           0 :                 translation = 1 + (static_cast<float>(offset.x + offset.y) - static_cast<float>(_kernelSize)) / static_cast<float>(_stride);
     237             :         } else {
     238           0 :                 translation = 1 + (static_cast<float>(offset.x + offset.y - 1) - static_cast<float>(_kernelSize)) / static_cast<float>(_stride);
     239             :         }
     240           0 :         return {0, { {scale, scale, translation, translation}}};
     241             : }
     242             : 
     243           0 : BytesView Conv2DLayer::getNormBetaBufferData() const {
     244           0 :         if (!_batchNormalization.beta.empty()) {
     245           0 :                 return BytesView((const uint8_t *)_batchNormalization.beta.data(), _batchNormalization.beta.size() * sizeof(float));
     246             :         }
     247           0 :         return BytesView((const uint8_t *)s_InvalidVector.data(), s_InvalidVector.size() * sizeof(float));
     248             : }
     249             : 
     250           0 : BytesView Conv2DLayer::getNormGammaBufferData() const {
     251           0 :         if (!_batchNormalization.gamma.empty()) {
     252           0 :                 return BytesView((const uint8_t *)_batchNormalization.gamma.data(), _batchNormalization.gamma.size() * sizeof(float));
     253             :         }
     254           0 :         return BytesView((const uint8_t *)s_InvalidVector.data(), s_InvalidVector.size() * sizeof(float));
     255             : }
     256             : 
     257           0 : BytesView Conv2DLayer::getNormMeanBufferData() const {
     258           0 :         if (!_batchNormalization.mean.empty()) {
     259           0 :                 return BytesView((const uint8_t *)_batchNormalization.mean.data(), _batchNormalization.mean.size() * sizeof(float));
     260             :         }
     261           0 :         return BytesView((const uint8_t *)s_InvalidVector.data(), s_InvalidVector.size() * sizeof(float));
     262             : }
     263             : 
     264           0 : BytesView Conv2DLayer::getNormVarianceBufferData() const {
     265           0 :         if (!_batchNormalization.variance.empty()) {
     266           0 :                 return BytesView((const uint8_t *)_batchNormalization.variance.data(), _batchNormalization.variance.size() * sizeof(float));
     267             :         }
     268           0 :         return BytesView((const uint8_t *)s_InvalidVector.data(), s_InvalidVector.size() * sizeof(float));
     269             : }
     270             : 
     271           0 : UVec4 Conv2DLayer::getPaddingOffset() const {
     272             :         UVec4 offsets;
     273           0 :         if (_paddingValue.empty()) {
     274           0 :                 offsets.x = _paddingT;
     275           0 :                 offsets.y = _paddingB;
     276           0 :                 offsets.z = _paddingL;
     277           0 :                 offsets.w = _paddingR;
     278             :         } else {
     279           0 :                 if (_paddingValue == "valid" || _paddingValue == "none") {
     280           0 :                         offsets.x = 0;
     281           0 :                         offsets.y = 0;
     282           0 :                         offsets.z = 0;
     283           0 :                         offsets.w = 0;
     284             :                 } else {
     285           0 :                         if (_kernelSize > 1) {
     286           0 :                                 offsets.x = std::max(static_cast<uint32_t>(_kernelSize / 2), (uint32_t) 1);
     287           0 :                                 offsets.y = std::max(static_cast<uint32_t>(_kernelSize / 2), (uint32_t) 1);
     288           0 :                                 offsets.z = std::max(static_cast<uint32_t>(_kernelSize / 2), (uint32_t) 1);
     289           0 :                                 offsets.w = std::max(static_cast<uint32_t>(_kernelSize / 2), (uint32_t) 1);
     290           0 :                                 if (_kernelSize % 2 == 0) {
     291           0 :                                         offsets.x = offsets.x - 1;
     292           0 :                                         offsets.z = offsets.z - 1;
     293             :                                 }
     294             :                         } else {
     295           0 :                                 offsets.x = 0;
     296           0 :                                 offsets.y = 0;
     297           0 :                                 offsets.z = 0;
     298           0 :                                 offsets.w = 0;
     299             :                         }
     300             :                 }
     301             :         }
     302           0 :         return offsets;
     303             : }
     304             : 
     305           0 : const core::QueuePassData *Conv2DLayer::prepare(core::Queue::Builder &builder,
     306             :                 Map<Layer *, const core::AttachmentData *> inputs,
     307             :                 Map<Attachment *, const core::AttachmentData *> attachments) {
     308             : 
     309           0 :         auto inputIt = attachments.find(_inputs.front().attachment);
     310           0 :         auto outputIt = attachments.find(getOutput());
     311             : 
     312           0 :         if (inputIt == attachments.end() || outputIt == attachments.end()) {
     313           0 :                 log::error("snn::InputLayer", "No attachments specified");
     314           0 :                 return nullptr;
     315             :         }
     316             : 
     317           0 :         return builder.addPass(getName(), core::PassType::Compute, core::RenderOrdering(_inputIndex),
     318           0 :                         [&] (core::QueuePassBuilder &passBuilder) -> Rc<core::QueuePass> {
     319           0 :                 return Rc<vk::shadernn::Conv2DLayer>::create(builder, passBuilder, this, inputIt->second, outputIt->second);
     320           0 :         });
     321             : }
     322             : 
     323             : }

Generated by: LCOV version 1.14