修改状态码命名

zjhellofss · Mar 10, 2024 · 975dcde · 975dcde
1 parent aeec18e
commit 975dcde
Show file tree

Hide file tree

Showing 17 changed files with 69 additions and 33 deletions.
diff --git a/bench/bench_rmsnorm.cpp b/bench/bench_rmsnorm.cpp
@@ -0,0 +1,41 @@
+#include <benchmark/benchmark.h>
+#include <cstring>
+#include <ctime>
+#include <memory>
+//
+// Created by fss on 24-2-15.
+//
+#include "../demos/llama2/llama_chat.hpp"
+#include "../source/layer/details/rms_norm.hpp"
+#include "data/tensor.hpp"
+
+static void BM_RMSNorm(benchmark::State& state) {
+  using namespace kuiper_infer;
+  int input_size = state.range(0);
+
+  std::vector<std::shared_ptr<Tensor<float>>> input_tensors;
+  std::vector<std::shared_ptr<Tensor<float>>> output_tensors;
+
+  std::shared_ptr<Tensor<float>> input_tensor = std::make_shared<Tensor<float>>(input_size);
+  input_tensor->RandN();
+  input_tensors.push_back(input_tensor);
+
+  std::vector<sftensor> weight_tensors;
+  weight_tensors.push_back(input_tensor);
+
+  std::vector<std::shared_ptr<Tensor<float>>> outputs(input_size);
+
+  std::shared_ptr<Tensor<float>> output_tensor = std::make_shared<Tensor<float>>(input_size);
+  output_tensors.push_back(output_tensor);
+
+  RMSNormLayer rms;
+  rms.set_weights(weight_tensors);
+  for (auto _ : state) {
+    rms.Forward(input_tensors, output_tensors);
+  }
+}
+
+BENCHMARK(BM_RMSNorm)->Unit(benchmark::kMillisecond)->Arg(128)->Iterations(3);
+BENCHMARK(BM_RMSNorm)->Unit(benchmark::kMillisecond)->Arg(512)->Iterations(3);
+BENCHMARK(BM_RMSNorm)->Unit(benchmark::kMillisecond)->Arg(1024)->Iterations(3);
+BENCHMARK(BM_RMSNorm)->Unit(benchmark::kMillisecond)->Arg(4096)->Iterations(3);
diff --git a/bench/bench_unet.cpp b/bench/bench_unet.cpp
@@ -44,4 +44,4 @@ static void BM_Unet_Batch1_512x512(benchmark::State& state) {
   }
 }
 
-BENCHMARK(BM_Unet_Batch1_512x512)->Unit(benchmark::kMillisecond)->Iterations(kIterationNum);
+//BENCHMARK(BM_Unet_Batch1_512x512)->Unit(benchmark::kMillisecond)->Iterations(kIterationNum);
diff --git a/include/status_code.hpp b/include/status_code.hpp
@@ -44,7 +44,7 @@ enum class StatusCode {
   kInferInputsEmpty = 1,
   kInferOutputsEmpty = 2,
   kInferParameterError = 3,
-  kInferInOutShapeMismatch = 4,
+  kInferDimMismatch = 4,
 
   kFunctionNotImplement = 5,
   kParseWeightError = 6,

diff --git a/source/layer/details/activation.cpp b/source/layer/details/activation.cpp
@@ -22,7 +22,7 @@ StatusCode ActivationForward(ActivationType type,
   if (inputs.size() != outputs.size()) {
     LOG(ERROR) << "The input and output tensor array size of the " + activation_type +
                       " layer do not match";
-    return StatusCode::kInferInOutShapeMismatch;
+    return StatusCode::kInferDimMismatch;
   }
 
   const uint32_t batch_size = inputs.size();

diff --git a/source/layer/details/adaptive_avgpooling.cpp b/source/layer/details/adaptive_avgpooling.cpp
@@ -48,7 +48,7 @@ StatusCode AdaptiveAveragePoolingLayer::Forward(
   if (inputs.size() != outputs.size()) {
     LOG(ERROR) << "The input and output tensor array size of the adaptive "
                   "pooling layer do not match";
-    return StatusCode::kInferInOutShapeMismatch;
+    return StatusCode::kInferDimMismatch;
   }
 
   if (!output_h_ || !output_w_) {

diff --git a/source/layer/details/base_convolution.cpp b/source/layer/details/base_convolution.cpp
@@ -106,7 +106,7 @@ StatusCode BaseConvolutionLayer::Forward(const std::vector<std::shared_ptr<Tenso
   if (inputs.size() != outputs.size()) {
     LOG(ERROR) << "The input and output tensor array size of the convolution "
                   "layer do not match";
-    return StatusCode::kInferInOutShapeMismatch;
+    return StatusCode::kInferDimMismatch;
   }
 
   if (weights_.empty()) {

diff --git a/source/layer/details/batchnorm2d.cpp b/source/layer/details/batchnorm2d.cpp
@@ -41,7 +41,7 @@ StatusCode BatchNorm2dLayer::Forward(const std::vector<std::shared_ptr<Tensor<fl
   if (inputs.size() != outputs.size()) {
     LOG(ERROR) << "The input and output tensor array size of the batchnorm2d "
                   "layer do not match";
-    return StatusCode::kInferInOutShapeMismatch;
+    return StatusCode::kInferDimMismatch;
   }
 
   const uint32_t mean_value_size = this->weights_.size();

diff --git a/source/layer/details/cat.cpp b/source/layer/details/cat.cpp
@@ -45,7 +45,7 @@ StatusCode CatLayer::Forward(const std::vector<std::shared_ptr<Tensor<float>>>&
   const uint32_t output_size = outputs.size();
   if (inputs.size() % output_size != 0) {
     LOG(ERROR) << "The input and output tensor array size of cat layer do not match";
-    return StatusCode::kInferInOutShapeMismatch;
+    return StatusCode::kInferDimMismatch;
   }
 
   const uint32_t packet_size = inputs.size() / output_size;

diff --git a/source/layer/details/flatten.cpp b/source/layer/details/flatten.cpp
@@ -44,7 +44,7 @@ StatusCode FlattenLayer::Forward(const std::vector<std::shared_ptr<Tensor<float>
   if (inputs.size() != outputs.size()) {
     LOG(ERROR) << "The input and output tensor array size of the flatten "
                   "layer do not match";
-    return StatusCode::kInferInOutShapeMismatch;
+    return StatusCode::kInferDimMismatch;
   }
 
   int32_t start_dim = start_dim_;

diff --git a/source/layer/details/linear.cpp b/source/layer/details/linear.cpp
@@ -55,7 +55,7 @@ StatusCode LinearLayer::Forward(const std::vector<std::shared_ptr<Tensor<float>>
   if (inputs.size() != outputs.size()) {
     LOG(ERROR) << "The input and output tensor array size of the linear "
                   "layer do not match";
-    return StatusCode::kInferInOutShapeMismatch;
+    return StatusCode::kInferDimMismatch;
   }
 
   if (this->weights_.empty()) {

diff --git a/source/layer/details/matmul.cpp b/source/layer/details/matmul.cpp
@@ -35,13 +35,6 @@ void LLamaMatmulLayer::set_weights(const std::vector<float>& weights) {
 
 void LLamaMatmulLayer::set_weights(const std::vector<std::shared_ptr<Tensor<float>>>& weights) {
   CHECK(weights.size() == weights_.size());
-  for (uint32_t i = 0; i < weights.size(); ++i) {
-    if (this->weights_.at(i) != nullptr) {
-      CHECK(this->weights_.at(i)->rows() == weights.at(i)->rows());
-      CHECK(this->weights_.at(i)->cols() == weights.at(i)->cols());
-      CHECK(this->weights_.at(i)->channels() == weights.at(i)->channels());
-    }
-  }
   this->weights_ = weights;
 }
 
@@ -60,7 +53,7 @@ StatusCode LLamaMatmulLayer::Forward(const std::vector<std::shared_ptr<Tensor<fl
   if (inputs.size() != outputs.size()) {
     LOG(ERROR) << "The input and output tensor array size of the matmul "
                   "layer do not match";
-    return StatusCode::kInferInOutShapeMismatch;
+    return StatusCode::kInferDimMismatch;
   }
 
   if (this->weights_.empty()) {
@@ -75,15 +68,13 @@ StatusCode LLamaMatmulLayer::Forward(const std::vector<std::shared_ptr<Tensor<fl
 
   // w @ x
   uint32_t batch = inputs.size();
-  const std::shared_ptr<Tensor<float>>& weight = weights_.front();
-  arma::fmat weight_data(weight->raw_ptr(), weight_dim1_, weight_dim0_, false, true);  // wt
 #pragma omp parallel for if (batch > 1) num_threads(batch)
   for (uint32_t i = 0; i < batch; ++i) {
     std::shared_ptr<Tensor<float>> input = inputs.at(i);
     CHECK(input != nullptr && !input->empty())
         << "The input tensor array in the matmul layer has an empty tensor " << i << " th";
     const std::vector<uint32_t>& input_shapes = input->raw_shapes();
-    CHECK(input_shapes.size() <= 2);
+    CHECK(!input_shapes.empty() && input_shapes.size() <= 2);
 
     uint32_t input_dim0 = 1;
     uint32_t input_dim1 = 1;
@@ -97,6 +88,7 @@ StatusCode LLamaMatmulLayer::Forward(const std::vector<std::shared_ptr<Tensor<fl
 
     // xt
     arma::fmat input_vec(input->raw_ptr(), input_dim1, input_dim0, false, true);
+    const std::shared_ptr<Tensor<float>>& weight = weights_.front();
     std::shared_ptr<Tensor<float>> output = outputs.at(i);
     if (output == nullptr || output->empty()) {
       output = std::make_shared<Tensor<float>>(1, input_dim1, weight_dim0_);
@@ -115,16 +107,19 @@ StatusCode LLamaMatmulLayer::Forward(const std::vector<std::shared_ptr<Tensor<fl
       float* output_ptr = output->raw_ptr();
       float* weight_ptr = weight->raw_ptr();
 #pragma omp parallel for
-      for (int j = 0; j < weight_dim0_; ++j) {
+      for (int32_t j = 0; j < weight_dim0_; ++j) {
         arma::fmat sub_weight(weight_ptr + j * weight_dim1_, weight_dim1_, 1, false, true);
-        *(output_ptr + j) = as_scalar(input_vec * sub_weight);
+        *(output_ptr + j) = arma::as_scalar(input_vec * sub_weight);
       }
-    } else if (weight_dim0_ == 1) {
-      arma::fmat output_mat(output->raw_ptr(), input_dim1, weight_dim0_, false, true);
-      output_mat = input_vec * weight_data;
     } else {
-      arma::fmat output_mat(output->raw_ptr(), weight_dim0_, input_dim1, false, true);
-      output_mat = (input_vec * weight_data).t();
+      arma::fmat weight_data(weight->raw_ptr(), weight_dim1_, weight_dim0_, false, true);  // wt
+      if (weight_dim0_ == 1) {
+        arma::fmat output_mat(output->raw_ptr(), input_dim1, weight_dim0_, false, true);
+        output_mat = input_vec * weight_data;
+      } else {
+        arma::fmat output_mat(output->raw_ptr(), weight_dim0_, input_dim1, false, true);
+        output_mat = (input_vec * weight_data).t();
+      }
     }
   }
   return StatusCode::kSuccess;

diff --git a/source/layer/details/maxpooling.cpp b/source/layer/details/maxpooling.cpp
@@ -57,7 +57,7 @@ StatusCode MaxPoolingLayer::Forward(const std::vector<std::shared_ptr<Tensor<flo
   if (inputs.size() != outputs.size()) {
     LOG(ERROR) << "The input and output tensor array size of the maxpooling "
                   "layer do not match";
-    return StatusCode::kInferInOutShapeMismatch;
+    return StatusCode::kInferDimMismatch;
   }
 
   if (!pooling_size_h_ || !pooling_size_w_) {

diff --git a/source/layer/details/rms_norm.cpp b/source/layer/details/rms_norm.cpp
@@ -51,7 +51,7 @@ StatusCode RMSNormLayer::Forward(const std::vector<std::shared_ptr<Tensor<float>
   if (inputs.size() != outputs.size()) {
     LOG(ERROR) << "The input and output tensor array size of the rmsnorm "
                   "layer do not match";
-    return StatusCode::kInferInOutShapeMismatch;
+    return StatusCode::kInferDimMismatch;
   }
 
   if (weights_.empty() || weights_.front()->empty()) {

diff --git a/source/layer/details/softmax.cpp b/source/layer/details/softmax.cpp
@@ -46,7 +46,7 @@ StatusCode SoftmaxLayer::Forward(const std::vector<std::shared_ptr<Tensor<float>
   if (inputs.size() != outputs.size()) {
     LOG(ERROR) << "The input and output tensor array size of the softmax "
                   "layer do not match";
-    return StatusCode::kInferInOutShapeMismatch;
+    return StatusCode::kInferDimMismatch;
   }
 
   const uint32_t batch_size = inputs.size();

diff --git a/source/layer/details/upsample.cpp b/source/layer/details/upsample.cpp
@@ -74,7 +74,7 @@ StatusCode UpSampleLayer::Forward(const std::vector<std::shared_ptr<Tensor<float
   if (inputs.size() != outputs.size()) {
     LOG(ERROR) << "The input and output tensor array size of the upsample "
                   "layer do not match";
-    return StatusCode::kInferInOutShapeMismatch;
+    return StatusCode::kInferDimMismatch;
   }
   uint32_t scale_w = static_cast<uint32_t>(scale_w_);
   uint32_t scale_h = static_cast<uint32_t>(scale_h_);

diff --git a/source/layer/details/view.cpp b/source/layer/details/view.cpp
@@ -43,7 +43,7 @@ StatusCode ViewLayer::Forward(const std::vector<std::shared_ptr<Tensor<float>>>&
   if (inputs.size() != outputs.size()) {
     LOG(ERROR) << "The input and output tensor array size of the view "
                   "layer do not match";
-    return StatusCode::kInferInOutShapeMismatch;
+    return StatusCode::kInferDimMismatch;
   }
 
   const uint32_t batch_size = inputs.size();

diff --git a/source/layer/details/yolo_detect.cpp b/source/layer/details/yolo_detect.cpp
@@ -60,7 +60,7 @@ StatusCode YoloDetectLayer::Forward(const std::vector<std::shared_ptr<Tensor<flo
   if (input_size / batch_size != stages_ || input_size % batch_size != 0) {
     LOG(ERROR) << "The input and output tensor array size of the yolo detect "
                   "layer do not match";
-    return StatusCode::kInferInOutShapeMismatch;
+    return StatusCode::kInferDimMismatch;
   }
 
   CHECK(!this->conv_layers_.empty() && this->conv_layers_.size() == stages)