Merge pull request #349 from siemonchan/master

增加释放权重所占内存空间的接口
ztxz16 · Oct 17, 2023 · c458888 · c458888
2 parents c98d016 + 8742a07
commit c458888
Show file tree

Hide file tree

Showing 5 changed files with 29 additions and 1 deletion.
diff --git a/include/fastllm.h b/include/fastllm.h
@@ -415,6 +415,8 @@ namespace fastllm {
         void AddWeight(const std::string &key, const std::vector <int> &dims,
                        DataType dataType, WeightType weightType, DataType oriDataType, uint8_t *oriData); // 插入一个权重
 
+        void ReleaseWeight(); // 释放所有权重占用的空间
+
         void AddQLinearWeight(const std::string &key, const std::vector <int> &dims,
                               int bit, float *scales, uint8_t *oriData); // 插入一个Qlinear层的权重，量化规则为float value = scales * oriData
 

diff --git a/include/models/basellm.h b/include/models/basellm.h
@@ -46,7 +46,9 @@ namespace fastllm {
     public:
         basellm() {};
 
-        ~basellm() {};
+        ~basellm() {
+            this->weight.ReleaseWeight();
+        };
 
         virtual void LoadFromFile(const std::string &fileName); // 从文件读取
 

diff --git a/src/fastllm.cpp b/src/fastllm.cpp
@@ -1722,6 +1722,21 @@ namespace fastllm {
         }
     }
 
+    void WeightMap::ReleaseWeight() {
+        for (auto &w : this->weight) {
+#ifndef USE_MMAP
+            delete[] w.second.cpuData;
+            w.second.cpuData = nullptr;
+#endif
+#ifdef USE_CUDA
+            if (w.second.cudaData != nullptr) {
+                FastllmCudaDirectFree(w.second.cudaData);
+                w.second.cudaData = nullptr;
+            }
+#endif
+        }
+    }
+
     Data &WeightMap::operator[](const std::string &key) {
         return weight[key];
     }

diff --git a/tools/fastllm_pytools/llm.py b/tools/fastllm_pytools/llm.py
@@ -345,3 +345,6 @@ def set_adapter(self, name: str):
 
     def disable_adapter(self):
         fastllm_lib.disable_adapter(self.model)
+
+    def release_memory(self):
+        fastllm_lib.release_memory(self.model)
diff --git a/tools/src/pytools.cpp b/tools/src/pytools.cpp
@@ -169,6 +169,12 @@ extern "C" {
         return;
     }
 
+    DLL_EXPORT void release_memory(int modelId) {
+        auto model = models.GetModel(modelId);
+        model->weight.ReleaseWeight();
+        return;
+    }
+
     DLL_EXPORT void init_params_llm_model(int modelId) {
         auto model = models.GetModel(modelId);
         model->InitParams();