1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
| BlueLMForCausalLM( (model): BlueLMModel( (embed_tokens): Embedding(100096, 4096, padding_idx=3) (embed_layer_norm): LayerNorm((4096,), eps=1e-06, elementwise_affine=True) (layers): ModuleList( (0-31): 32 x BlueLMDecoderLayer( (self_attn): BlueLMAttention( (q_proj): Linear(in_features=4096, out_features=4096, bias=False) (k_proj): Linear(in_features=4096, out_features=4096, bias=False) (v_proj): Linear(in_features=4096, out_features=4096, bias=False) (o_proj): Linear(in_features=4096, out_features=4096, bias=False) (rotary_emb): BlueLMRotaryEmbedding() ) (mlp): BlueLMMLP( (gate_proj): Linear(in_features=4096, out_features=11008, bias=False) (down_proj): Linear(in_features=11008, out_features=4096, bias=False) (up_proj): Linear(in_features=4096, out_features=11008, bias=False) (act_fn): SiLU() (dropout): Dropout(p=0, inplace=False) ) (input_layernorm): BlueLMRMSNorm() (post_attention_layernorm): BlueLMRMSNorm() ) ) (norm): BlueLMRMSNorm() ) (lm_head): Linear(in_features=4096, out_features=100096, bias=False) )
|