mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-04-16 11:36:08 +00:00
llama : support converting Mistral Small text-only (#12450)
This commit is contained in:
parent
c6af2161b2
commit
29fff308c7
@ -1747,6 +1747,25 @@ class LlamaModel(Model):
|
|||||||
raise ValueError(f"Unprocessed experts: {experts}")
|
raise ValueError(f"Unprocessed experts: {experts}")
|
||||||
|
|
||||||
|
|
||||||
|
@Model.register("Mistral3ForConditionalGeneration")
|
||||||
|
class Mistral3Model(LlamaModel):
|
||||||
|
model_arch = gguf.MODEL_ARCH.LLAMA
|
||||||
|
|
||||||
|
# we need to merge the text_config into the root level of hparams
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
hparams = Model.load_hparams(kwargs["dir_model"])
|
||||||
|
if "text_config" in hparams:
|
||||||
|
hparams = {**hparams, **hparams["text_config"]}
|
||||||
|
kwargs["hparams"] = hparams
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None):
|
||||||
|
name = name.replace("language_model.", "")
|
||||||
|
if "multi_modal_projector" in name or "vision_tower" in name:
|
||||||
|
return []
|
||||||
|
return super().modify_tensors(data_torch, name, bid)
|
||||||
|
|
||||||
|
|
||||||
@Model.register("DeciLMForCausalLM")
|
@Model.register("DeciLMForCausalLM")
|
||||||
class DeciModel(Model):
|
class DeciModel(Model):
|
||||||
model_arch = gguf.MODEL_ARCH.DECI
|
model_arch = gguf.MODEL_ARCH.DECI
|
||||||
|
Loading…
x
Reference in New Issue
Block a user