From 20b256e0fd3bbc9771e5e52ae9ef9db20bd547e3 Mon Sep 17 00:00:00 2001 From: Francis Couture-Harpin Date: Tue, 25 Mar 2025 14:29:22 -0400 Subject: [PATCH] convert : match ssm_conv tensors by type --- convert_hf_to_gguf.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 04800a94e..5d9c5f30b 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -3803,8 +3803,6 @@ class MambaModel(Model): _tok_embd = None def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]: - del bid # unused - output_name = self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT) tok_embd_name = self.format_tensor_name(gguf.MODEL_TENSOR.TOKEN_EMBD) @@ -3815,7 +3813,7 @@ class MambaModel(Model): data_torch = -torch.exp(data_torch) # [4 1 8192 1] -> [4 8192 1 1] - if new_name.endswith(".ssm_conv1d"): + if self.match_model_tensor_name(new_name, gguf.MODEL_TENSOR.SSM_CONV1D, bid): data_torch = data_torch.squeeze() # assuming token_embd.weight is seen before output.weight