Skip to content

Commit

Permalink
Merge pull request #349 from siemonchan/master
Browse files Browse the repository at this point in the history
增加释放权重所占内存空间的接口
  • Loading branch information
ztxz16 committed Oct 17, 2023
2 parents c98d016 + 8742a07 commit c458888
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 1 deletion.
2 changes: 2 additions & 0 deletions include/fastllm.h
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,8 @@ namespace fastllm {
void AddWeight(const std::string &key, const std::vector <int> &dims,
DataType dataType, WeightType weightType, DataType oriDataType, uint8_t *oriData); // 插入一个权重

void ReleaseWeight(); // 释放所有权重占用的空间

void AddQLinearWeight(const std::string &key, const std::vector <int> &dims,
int bit, float *scales, uint8_t *oriData); // 插入一个Qlinear层的权重,量化规则为float value = scales * oriData

Expand Down
4 changes: 3 additions & 1 deletion include/models/basellm.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,9 @@ namespace fastllm {
public:
basellm() {};

~basellm() {};
~basellm() {
this->weight.ReleaseWeight();
};

virtual void LoadFromFile(const std::string &fileName); // 从文件读取

Expand Down
15 changes: 15 additions & 0 deletions src/fastllm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1722,6 +1722,21 @@ namespace fastllm {
}
}

void WeightMap::ReleaseWeight() {
for (auto &w : this->weight) {
#ifndef USE_MMAP
delete[] w.second.cpuData;
w.second.cpuData = nullptr;
#endif
#ifdef USE_CUDA
if (w.second.cudaData != nullptr) {
FastllmCudaDirectFree(w.second.cudaData);
w.second.cudaData = nullptr;
}
#endif
}
}

Data &WeightMap::operator[](const std::string &key) {
return weight[key];
}
Expand Down
3 changes: 3 additions & 0 deletions tools/fastllm_pytools/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,3 +345,6 @@ def set_adapter(self, name: str):

def disable_adapter(self):
fastllm_lib.disable_adapter(self.model)

def release_memory(self):
fastllm_lib.release_memory(self.model)
6 changes: 6 additions & 0 deletions tools/src/pytools.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,12 @@ extern "C" {
return;
}

DLL_EXPORT void release_memory(int modelId) {
auto model = models.GetModel(modelId);
model->weight.ReleaseWeight();
return;
}

DLL_EXPORT void init_params_llm_model(int modelId) {
auto model = models.GetModel(modelId);
model->InitParams();
Expand Down

0 comments on commit c458888

Please sign in to comment.