diff --git a/download_models.py b/download_models.py new file mode 100644 index 0000000..9e4be06 --- /dev/null +++ b/download_models.py @@ -0,0 +1,13 @@ +from transformers import LlamaForCausalLM + +model_small = "meta-llama/Llama-2-7b-hf" +model_medium = "meta-llama/Llama-2-13b-hf" +model_large = "meta-llama/Llama-2-70b-hf" + + +model = LlamaForCausalLM.from_pretrained( + model_medium, + torch_dtype="auto", + cache_dir="/scratch/p490-24-t/all_llamas", + token="", +) diff --git a/llama.py b/llama.py index 7993a5b..8e4eb42 100644 --- a/llama.py +++ b/llama.py @@ -8,6 +8,7 @@ try: import wandb + has_wandb = True except: has_wandb = False @@ -15,20 +16,27 @@ def get_llama(model): import torch + def skip(*args, **kwargs): pass + torch.nn.init.kaiming_uniform_ = skip torch.nn.init.uniform_ = skip torch.nn.init.normal_ = skip from transformers import LlamaForCausalLM - model = LlamaForCausalLM.from_pretrained(model, torch_dtype='auto')#, cache_dir='/scratch/p490-24-t/llamas') + + model = LlamaForCausalLM.from_pretrained( + model, + torch_dtype="auto", + cache_dir="/scratch/p487-24-1/llamas", + ) model.seqlen = model.config.max_position_embeddings return model @torch.no_grad() def llama_sequential(model, dataloader, dev): - print("Starting...") + print(f"Starting... on device {dev}") use_cache = model.config.use_cache model.config.use_cache = False @@ -71,7 +79,6 @@ def forward(self, inp, **kwargs): outs = torch.zeros_like(inps) attention_mask = cache["attention_mask"] - if args.fix_mask: masks = {} for n, p in model.named_parameters(): @@ -82,11 +89,11 @@ def forward(self, inp, **kwargs): dim = shape_key[0] nnz = 0.1 if shape_key[0] == shape_key[1] else 0.2 print(n, p.shape, shape_key, nnz) - A = torch.eye(dim, device="cuda") + A = torch.eye(dim, device="cuda") Arand = torch.rand_like(A) Arand += A * 100 - thres = Arand.abs().flatten().sort()[0][int(A.numel() * (1-nnz))] - masks[shape_key] = (Arand.abs() > thres) + thres = Arand.abs().flatten().sort()[0][int(A.numel() * (1 - nnz))] + masks[shape_key] = Arand.abs() > thres print("Ready.") @@ -114,12 +121,16 @@ def forward(self, inp, **kwargs): not (args.minlayer <= i < args.maxlayer and args.prune_only in name) ) == (not args.invert): continue - + fixmask = None if args.fix_mask: - shape_key = min(subset[name].weight.shape), max(subset[name].weight.shape) + shape_key = min(subset[name].weight.shape), max( + subset[name].weight.shape + ) fixmask = masks[shape_key] - gpts[name] = DoubleSparse(subset[name], nofinal=args.no_final, fixmask=fixmask) + gpts[name] = DoubleSparse( + subset[name], nofinal=args.no_final, fixmask=fixmask + ) def add_batch(name): def tmp(_, inp, out): @@ -162,7 +173,7 @@ def tmp(_, inp, out): @torch.no_grad() -def llama_eval(model, testenc, dev, dataset: str, log_wandb: bool = False): +def llama_eval(model, testenc, dev, dataset: str, log_wandb: bool = False): print("Evaluating ...") testenc = testenc.input_ids @@ -320,9 +331,7 @@ def forward(self, inp, **kwargs): parser.add_argument( "--no-final", action="store_true", help="Do not run the finalizer." ) - parser.add_argument( - "--fix-mask", action="store_true", help="Keep one mask fixed." - ) + parser.add_argument("--fix-mask", action="store_true", help="Keep one mask fixed.") args = parser.parse_args() # init W&B logging @@ -330,11 +339,18 @@ def forward(self, inp, **kwargs): assert has_wandb, "wandb not installed try `pip install wandb`" wandb.init(config=args) + print(f"Running on dev: {DEV}") + print("loading llama") model = get_llama(args.model) + print("llama loaded") model.eval() dataloader, testloader = get_loaders( - args.dataset, nsamples=args.nsamples, seed=args.seed, model=args.model, seqlen=model.seqlen + args.dataset, + nsamples=args.nsamples, + seed=args.seed, + model=args.model, + seqlen=model.seqlen, ) if (args.sparsity or args.prunen) and not args.gmp: @@ -342,7 +358,7 @@ def forward(self, inp, **kwargs): llama_sequential(model, dataloader, DEV) for n, p in model.named_parameters(): print(n, torch.mean((p == 0).float())) - if 'down_proj' in n: + if "down_proj" in n: break print(time.time() - tick) diff --git a/logs/llama2-13-0.5 b/logs/llama2-13-0.5 new file mode 100644 index 0000000..7b55b0e --- /dev/null +++ b/logs/llama2-13-0.5 @@ -0,0 +1,2 @@ +Running on dev: cuda:0 +loading llama diff --git a/logs/llama2-70-0.5 b/logs/llama2-70-0.5 new file mode 100644 index 0000000..3ca622c --- /dev/null +++ b/logs/llama2-70-0.5 @@ -0,0 +1,4017 @@ +Running on dev: cuda:0 +loading llama +llama loaded +Starting... on device cuda:0 +Ready. +0 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 10.717611312866211 +err_fin 4.021018981933594 +sparsity check 0.4999999701976776 +time 76.29 +0 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 21.521377563476562 +err_fin 7.139569282531738 +sparsity check 0.4999997615814209 +time 1.34 +0 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 2.3604230880737305 +err_fin 1.2943761348724365 +sparsity check 0.4999997615814209 +time 1.34 +0 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 0.3077867031097412 +err_fin 0.014042209833860397 +sparsity check 0.4999999701976776 +time 68.94 +0 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 232.090087890625 +err_fin 70.949462890625 +sparsity check 0.49999999148505075 +time 138.88 +0 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 232.13897705078125 +err_fin 70.3640365600586 +sparsity check 0.49999999148505075 +time 139.19 +0 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 4.307583332061768 +err_fin 2.0438151359558105 +sparsity check 0.49999999148505075 +time 136.81 +1 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 202.37362670898438 +err_fin 59.84782791137695 +sparsity check 0.4999999701976776 +time 76.03 +1 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 191.2064666748047 +err_fin 68.62715148925781 +sparsity check 0.4999997615814209 +time 1.34 +1 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 17.184864044189453 +err_fin 9.660581588745117 +sparsity check 0.4999997615814209 +time 1.34 +1 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 10.127310752868652 +err_fin 1.458266258239746 +sparsity check 0.4999999701976776 +time 68.90 +1 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1592.7952880859375 +err_fin 427.54974365234375 +sparsity check 0.49999999148505075 +time 138.90 +1 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1730.3133544921875 +err_fin 450.73004150390625 +sparsity check 0.49999999148505075 +time 139.15 +1 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 102.43939208984375 +err_fin 80.88983154296875 +sparsity check 0.49999999148505075 +time 136.81 +2 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 689.7213134765625 +err_fin 288.656005859375 +sparsity check 0.4999999701976776 +time 75.94 +2 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 824.6632080078125 +err_fin 409.6326904296875 +sparsity check 0.4999997615814209 +time 1.35 +2 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 83.88253021240234 +err_fin 58.09417724609375 +sparsity check 0.4999997615814209 +time 1.36 +2 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 53.122962951660156 +err_fin 13.704061508178711 +sparsity check 0.4999999701976776 +time 68.95 +2 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 7269.6982421875 +err_fin 2657.683837890625 +sparsity check 0.49999999148505075 +time 138.88 +2 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 7611.2265625 +err_fin 2706.63330078125 +sparsity check 0.49999999148505075 +time 139.19 +2 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 372.63592529296875 +err_fin 314.4751892089844 +sparsity check 0.49999999148505075 +time 136.74 +3 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 5286.748046875 +err_fin 2658.657958984375 +sparsity check 0.4999999701976776 +time 75.96 +3 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 3639.0576171875 +err_fin 2397.865478515625 +sparsity check 0.4999997615814209 +time 1.36 +3 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 1165.04296875 +err_fin 878.8387451171875 +sparsity check 0.4999997615814209 +time 1.35 +3 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 108.81578063964844 +err_fin 44.38947296142578 +sparsity check 0.4999999701976776 +time 68.97 +3 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 18334.98828125 +err_fin 8419.98828125 +sparsity check 0.49999999148505075 +time 138.87 +3 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 18495.703125 +err_fin 8321.044921875 +sparsity check 0.49999999148505075 +time 139.22 +3 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 570.5305786132812 +err_fin 501.5911865234375 +sparsity check 0.49999999148505075 +time 136.73 +4 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 11264.912109375 +err_fin 6210.33642578125 +sparsity check 0.4999999701976776 +time 75.97 +4 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 5589.6083984375 +err_fin 3927.994384765625 +sparsity check 0.4999997615814209 +time 1.35 +4 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 2032.5537109375 +err_fin 1604.776611328125 +sparsity check 0.4999997615814209 +time 1.34 +4 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 128.72828674316406 +err_fin 56.752899169921875 +sparsity check 0.4999999701976776 +time 68.87 +4 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 32660.15234375 +err_fin 17234.7265625 +sparsity check 0.49999999148505075 +time 138.82 +4 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 32144.40625 +err_fin 16705.20703125 +sparsity check 0.49999999148505075 +time 139.15 +4 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 881.4537963867188 +err_fin 788.0779418945312 +sparsity check 0.49999999148505075 +time 136.71 +5 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 16228.5498046875 +err_fin 9696.9482421875 +sparsity check 0.4999999701976776 +time 75.96 +5 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 9036.0322265625 +err_fin 6727.912109375 +sparsity check 0.4999997615814209 +time 1.36 +5 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 2971.5908203125 +err_fin 2443.595458984375 +sparsity check 0.4999997615814209 +time 1.36 +5 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 200.94137573242188 +err_fin 85.52928161621094 +sparsity check 0.4999999701976776 +time 68.92 +5 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 42693.5234375 +err_fin 24387.9921875 +sparsity check 0.49999999148505075 +time 138.83 +5 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 41738.5078125 +err_fin 23544.52734375 +sparsity check 0.49999999148505075 +time 139.19 +5 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1236.692138671875 +err_fin 1100.3291015625 +sparsity check 0.49999999148505075 +time 136.70 +6 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 18688.27734375 +err_fin 11589.1328125 +sparsity check 0.4999999701976776 +time 75.97 +6 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 10250.5146484375 +err_fin 7875.67724609375 +sparsity check 0.4999997615814209 +time 1.35 +6 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 3508.52392578125 +err_fin 2945.929443359375 +sparsity check 0.4999997615814209 +time 1.34 +6 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 397.99334716796875 +err_fin 195.22512817382812 +sparsity check 0.4999999701976776 +time 68.90 +6 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 57998.7265625 +err_fin 34412.84375 +sparsity check 0.49999999148505075 +time 138.86 +6 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 56338.06640625 +err_fin 33092.6171875 +sparsity check 0.49999999148505075 +time 139.17 +6 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1841.37353515625 +err_fin 1657.0572509765625 +sparsity check 0.49999999148505075 +time 136.75 +7 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 29829.900390625 +err_fin 19338.62109375 +sparsity check 0.4999999701976776 +time 75.95 +7 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 15511.46875 +err_fin 12543.9169921875 +sparsity check 0.4999997615814209 +time 1.34 +7 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 5050.56201171875 +err_fin 4345.7109375 +sparsity check 0.4999997615814209 +time 1.34 +7 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 480.6468200683594 +err_fin 224.02084350585938 +sparsity check 0.4999999701976776 +time 69.05 +7 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 73034.484375 +err_fin 45068.453125 +sparsity check 0.49999999148505075 +time 139.08 +7 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 70760.2421875 +err_fin 43282.4140625 +sparsity check 0.49999999148505075 +time 139.45 +7 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 2596.173828125 +err_fin 2334.00634765625 +sparsity check 0.49999999148505075 +time 136.99 +8 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 26124.1953125 +err_fin 17458.955078125 +sparsity check 0.4999999701976776 +time 75.99 +8 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 13504.5673828125 +err_fin 10742.255859375 +sparsity check 0.4999997615814209 +time 1.54 +8 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 4790.90869140625 +err_fin 4118.7841796875 +sparsity check 0.4999997615814209 +time 1.35 +8 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 817.05517578125 +err_fin 364.8235168457031 +sparsity check 0.4999999701976776 +time 68.93 +8 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 102645.140625 +err_fin 65863.28125 +sparsity check 0.49999999148505075 +time 139.05 +8 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 97679.3203125 +err_fin 62139.15625 +sparsity check 0.49999999148505075 +time 139.36 +8 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 4634.08203125 +err_fin 4652.578125 +sparsity check 0.49999999148505075 +time 136.88 +9 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 29451.58203125 +err_fin 20870.509765625 +sparsity check 0.4999999701976776 +time 75.93 +9 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 15569.3203125 +err_fin 13249.1708984375 +sparsity check 0.4999997615814209 +time 1.37 +9 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 7520.92578125 +err_fin 6713.357421875 +sparsity check 0.4999997615814209 +time 1.33 +9 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 462.6151123046875 +err_fin 199.6043701171875 +sparsity check 0.4999999701976776 +time 68.87 +9 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 134011.9375 +err_fin 89886.0 +sparsity check 0.49999999148505075 +time 139.05 +9 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 126603.546875 +err_fin 84247.96875 +sparsity check 0.49999999148505075 +time 139.44 +9 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 4630.21630859375 +err_fin 4230.681640625 +sparsity check 0.49999999148505075 +time 136.93 +10 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 17889.865234375 +err_fin 12942.79296875 +sparsity check 0.4999999701976776 +time 75.95 +10 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 7444.3203125 +err_fin 6123.212890625 +sparsity check 0.4999997615814209 +time 1.35 +10 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 7306.6875 +err_fin 6499.71875 +sparsity check 0.4999997615814209 +time 1.35 +10 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 357.70159912109375 +err_fin 163.88455200195312 +sparsity check 0.4999999701976776 +time 68.94 +10 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 164816.0625 +err_fin 115543.453125 +sparsity check 0.49999999148505075 +time 139.09 +10 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 155024.53125 +err_fin 107871.078125 +sparsity check 0.49999999148505075 +time 139.41 +10 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 5262.95166015625 +err_fin 4857.232421875 +sparsity check 0.49999999148505075 +time 136.99 +11 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 28585.357421875 +err_fin 21026.21484375 +sparsity check 0.4999999701976776 +time 75.94 +11 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 15701.8076171875 +err_fin 13197.6953125 +sparsity check 0.4999997615814209 +time 1.36 +11 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 8526.1015625 +err_fin 7540.1748046875 +sparsity check 0.4999997615814209 +time 1.33 +11 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 553.4183959960938 +err_fin 248.75852966308594 +sparsity check 0.4999999701976776 +time 68.86 +11 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 178588.0625 +err_fin 127428.390625 +sparsity check 0.49999999148505075 +time 138.99 +11 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 168528.328125 +err_fin 119484.1875 +sparsity check 0.49999999148505075 +time 139.34 +11 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 5805.6396484375 +err_fin 5372.294921875 +sparsity check 0.49999999148505075 +time 136.90 +12 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 24431.125 +err_fin 17950.859375 +sparsity check 0.4999999701976776 +time 75.95 +12 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 13860.294921875 +err_fin 11449.720703125 +sparsity check 0.4999997615814209 +time 1.37 +12 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 6270.4541015625 +err_fin 5479.6904296875 +sparsity check 0.4999997615814209 +time 1.34 +12 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 686.9627075195312 +err_fin 301.6113586425781 +sparsity check 0.4999999701976776 +time 68.94 +12 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 176493.125 +err_fin 126617.734375 +sparsity check 0.49999999148505075 +time 138.89 +12 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 168081.25 +err_fin 119958.796875 +sparsity check 0.49999999148505075 +time 139.24 +12 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 6426.07421875 +err_fin 5913.2431640625 +sparsity check 0.49999999148505075 +time 136.74 +13 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 31198.03125 +err_fin 23000.18359375 +sparsity check 0.4999999701976776 +time 75.98 +13 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 14692.3046875 +err_fin 12144.748046875 +sparsity check 0.4999997615814209 +time 1.36 +13 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 8522.2490234375 +err_fin 7549.24169921875 +sparsity check 0.4999997615814209 +time 1.34 +13 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 1789.514404296875 +err_fin 919.1436767578125 +sparsity check 0.4999999701976776 +time 69.03 +13 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 186214.0 +err_fin 131577.75 +sparsity check 0.49999999148505075 +time 138.90 +13 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 179030.8125 +err_fin 125853.3984375 +sparsity check 0.49999999148505075 +time 139.32 +13 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 8078.4228515625 +err_fin 7425.3779296875 +sparsity check 0.49999999148505075 +time 136.75 +14 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 50920.99609375 +err_fin 37752.6328125 +sparsity check 0.4999999701976776 +time 75.98 +14 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 25162.892578125 +err_fin 21305.4609375 +sparsity check 0.4999997615814209 +time 1.35 +14 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 11922.4052734375 +err_fin 10666.63671875 +sparsity check 0.4999997615814209 +time 1.33 +14 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 2095.82421875 +err_fin 990.3251953125 +sparsity check 0.4999999701976776 +time 68.94 +14 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 216393.25 +err_fin 156643.15625 +sparsity check 0.49999999148505075 +time 138.93 +14 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 208099.84375 +err_fin 149852.75 +sparsity check 0.49999999148505075 +time 139.25 +14 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 8740.8046875 +err_fin 8084.6298828125 +sparsity check 0.49999999148505075 +time 136.73 +15 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 37790.51171875 +err_fin 28526.0625 +sparsity check 0.4999999701976776 +time 75.94 +15 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 17737.49609375 +err_fin 15115.232421875 +sparsity check 0.4999997615814209 +time 1.35 +15 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 14803.548828125 +err_fin 13282.87109375 +sparsity check 0.4999997615814209 +time 1.34 +15 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 2026.078125 +err_fin 996.5897216796875 +sparsity check 0.4999999701976776 +time 68.91 +15 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 232528.6875 +err_fin 169218.375 +sparsity check 0.49999999148505075 +time 138.86 +15 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 224502.9375 +err_fin 162470.671875 +sparsity check 0.49999999148505075 +time 139.25 +15 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 10293.46875 +err_fin 9477.0576171875 +sparsity check 0.49999999148505075 +time 136.77 +16 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 36932.99609375 +err_fin 28156.908203125 +sparsity check 0.4999999701976776 +time 75.97 +16 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 18068.9453125 +err_fin 15490.4482421875 +sparsity check 0.4999997615814209 +time 1.36 +16 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 13673.15234375 +err_fin 12359.716796875 +sparsity check 0.4999997615814209 +time 1.34 +16 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 1736.9384765625 +err_fin 855.127197265625 +sparsity check 0.4999999701976776 +time 68.89 +16 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 245166.546875 +err_fin 180737.34375 +sparsity check 0.49999999148505075 +time 138.89 +16 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 237606.0 +err_fin 174357.09375 +sparsity check 0.49999999148505075 +time 139.21 +16 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 10494.88671875 +err_fin 9731.7978515625 +sparsity check 0.49999999148505075 +time 136.77 +17 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 47215.6171875 +err_fin 35621.890625 +sparsity check 0.4999999701976776 +time 76.00 +17 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 24034.9609375 +err_fin 20469.904296875 +sparsity check 0.4999997615814209 +time 1.36 +17 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 11634.8115234375 +err_fin 10334.44921875 +sparsity check 0.4999997615814209 +time 1.35 +17 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 2806.618896484375 +err_fin 1201.42431640625 +sparsity check 0.4999999701976776 +time 68.98 +17 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 241953.90625 +err_fin 176146.4375 +sparsity check 0.49999999148505075 +time 139.31 +17 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 235002.453125 +err_fin 170181.765625 +sparsity check 0.49999999148505075 +time 139.14 +17 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 13314.861328125 +err_fin 12041.2578125 +sparsity check 0.49999999148505075 +time 136.73 +18 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 44651.109375 +err_fin 33904.859375 +sparsity check 0.4999999701976776 +time 75.95 +18 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 20965.923828125 +err_fin 18088.37890625 +sparsity check 0.4999997615814209 +time 1.36 +18 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 18601.740234375 +err_fin 16901.2890625 +sparsity check 0.4999997615814209 +time 1.35 +18 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 2928.0595703125 +err_fin 1358.638916015625 +sparsity check 0.4999999701976776 +time 69.09 +18 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 268416.65625 +err_fin 196345.8125 +sparsity check 0.49999999148505075 +time 138.90 +18 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 258956.84375 +err_fin 188393.03125 +sparsity check 0.49999999148505075 +time 139.17 +18 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 14657.4140625 +err_fin 13266.8935546875 +sparsity check 0.49999999148505075 +time 136.67 +19 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 53517.2109375 +err_fin 40694.8125 +sparsity check 0.4999999701976776 +time 75.94 +19 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 25084.912109375 +err_fin 22073.0234375 +sparsity check 0.4999997615814209 +time 1.39 +19 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 22389.66015625 +err_fin 20554.13671875 +sparsity check 0.4999997615814209 +time 1.38 +19 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 2164.009765625 +err_fin 1077.3140869140625 +sparsity check 0.4999999701976776 +time 68.83 +19 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 291777.65625 +err_fin 215023.40625 +sparsity check 0.49999999148505075 +time 138.84 +19 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 282239.6875 +err_fin 206979.453125 +sparsity check 0.49999999148505075 +time 139.14 +19 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 14946.92578125 +err_fin 13706.7490234375 +sparsity check 0.49999999148505075 +time 136.84 +20 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 47994.28515625 +err_fin 36983.4296875 +sparsity check 0.4999999701976776 +time 75.99 +20 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 21030.80078125 +err_fin 18505.185546875 +sparsity check 0.4999997615814209 +time 1.33 +20 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 18498.4921875 +err_fin 17065.44140625 +sparsity check 0.4999997615814209 +time 1.34 +20 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 1787.7657470703125 +err_fin 750.0384521484375 +sparsity check 0.4999999701976776 +time 68.86 +20 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 314702.21875 +err_fin 233465.40625 +sparsity check 0.49999999148505075 +time 138.91 +20 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 304739.125 +err_fin 224867.671875 +sparsity check 0.49999999148505075 +time 139.31 +20 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 15340.76171875 +err_fin 14158.5849609375 +sparsity check 0.49999999148505075 +time 136.75 +21 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 30180.0546875 +err_fin 23343.21484375 +sparsity check 0.4999999701976776 +time 75.98 +21 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 11323.00390625 +err_fin 9676.36328125 +sparsity check 0.4999997615814209 +time 1.57 +21 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 15738.5107421875 +err_fin 14382.1328125 +sparsity check 0.4999997615814209 +time 1.34 +21 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 2251.770751953125 +err_fin 1092.03955078125 +sparsity check 0.4999999701976776 +time 68.88 +21 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 329349.25 +err_fin 244980.453125 +sparsity check 0.49999999148505075 +time 138.87 +21 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 320610.8125 +err_fin 237296.0 +sparsity check 0.49999999148505075 +time 139.15 +21 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 15934.818359375 +err_fin 14806.017578125 +sparsity check 0.49999999148505075 +time 136.72 +22 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 29016.478515625 +err_fin 22520.154296875 +sparsity check 0.4999999701976776 +time 75.99 +22 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 13485.953125 +err_fin 11753.7607421875 +sparsity check 0.4999997615814209 +time 1.37 +22 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 17941.8984375 +err_fin 16569.421875 +sparsity check 0.4999997615814209 +time 1.33 +22 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 1523.11279296875 +err_fin 656.5509033203125 +sparsity check 0.4999999701976776 +time 68.92 +22 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 346120.71875 +err_fin 258356.25 +sparsity check 0.49999999148505075 +time 138.87 +22 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 337648.8125 +err_fin 250853.78125 +sparsity check 0.49999999148505075 +time 139.25 +22 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 16303.919921875 +err_fin 15231.833984375 +sparsity check 0.49999999148505075 +time 136.75 +23 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 41636.0234375 +err_fin 32156.23828125 +sparsity check 0.4999999701976776 +time 76.00 +23 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 21291.38671875 +err_fin 18641.78125 +sparsity check 0.4999997615814209 +time 1.58 +23 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 18855.291015625 +err_fin 17419.83203125 +sparsity check 0.4999997615814209 +time 1.33 +23 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 2125.40185546875 +err_fin 998.30517578125 +sparsity check 0.4999999701976776 +time 68.95 +23 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 355178.125 +err_fin 265911.59375 +sparsity check 0.49999999148505075 +time 138.88 +23 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 347161.5625 +err_fin 258682.9375 +sparsity check 0.49999999148505075 +time 138.89 +23 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 16935.09375 +err_fin 15803.7958984375 +sparsity check 0.49999999148505075 +time 136.36 +24 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 36693.14453125 +err_fin 28276.1171875 +sparsity check 0.4999999701976776 +time 75.99 +24 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 18554.3984375 +err_fin 16096.0712890625 +sparsity check 0.4999997615814209 +time 1.36 +24 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 13973.544921875 +err_fin 12703.9609375 +sparsity check 0.4999997615814209 +time 1.34 +24 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 2061.354736328125 +err_fin 965.4517211914062 +sparsity check 0.4999999701976776 +time 68.95 +24 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 343603.90625 +err_fin 257796.65625 +sparsity check 0.49999999148505075 +time 138.92 +24 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 338258.9375 +err_fin 252935.359375 +sparsity check 0.49999999148505075 +time 138.93 +24 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 17086.5078125 +err_fin 15960.8994140625 +sparsity check 0.49999999148505075 +time 136.82 +25 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 41922.10546875 +err_fin 32402.29296875 +sparsity check 0.4999999701976776 +time 75.96 +25 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 21058.921875 +err_fin 18029.76171875 +sparsity check 0.4999997615814209 +time 1.36 +25 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 14476.837890625 +err_fin 13038.7958984375 +sparsity check 0.4999997615814209 +time 1.34 +25 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 4923.3603515625 +err_fin 2420.1044921875 +sparsity check 0.4999999701976776 +time 68.93 +25 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 323719.625 +err_fin 236912.96875 +sparsity check 0.49999999148505075 +time 138.97 +25 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 320709.0625 +err_fin 233890.0625 +sparsity check 0.49999999148505075 +time 138.99 +25 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 22923.40625 +err_fin 21021.1875 +sparsity check 0.49999999148505075 +time 136.42 +26 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 61113.9140625 +err_fin 46681.4296875 +sparsity check 0.4999999701976776 +time 75.97 +26 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 30957.59375 +err_fin 26995.5703125 +sparsity check 0.4999997615814209 +time 1.36 +26 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 21901.88671875 +err_fin 19881.6875 +sparsity check 0.4999997615814209 +time 1.34 +26 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 5200.109375 +err_fin 2634.781494140625 +sparsity check 0.4999999701976776 +time 68.87 +26 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 369029.5625 +err_fin 272602.0 +sparsity check 0.49999999148505075 +time 138.89 +26 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 364295.34375 +err_fin 268002.59375 +sparsity check 0.49999999148505075 +time 139.16 +26 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 23021.845703125 +err_fin 21498.466796875 +sparsity check 0.49999999148505075 +time 136.74 +27 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 28182.7890625 +err_fin 21998.55859375 +sparsity check 0.4999999701976776 +time 75.96 +27 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 13437.1181640625 +err_fin 11674.521484375 +sparsity check 0.4999997615814209 +time 1.36 +27 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 19074.416015625 +err_fin 17677.24609375 +sparsity check 0.4999997615814209 +time 1.35 +27 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 2304.667724609375 +err_fin 1010.9876098632812 +sparsity check 0.4999999701976776 +time 69.01 +27 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 391651.625 +err_fin 291297.09375 +sparsity check 0.49999999148505075 +time 138.90 +27 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 386030.875 +err_fin 285705.375 +sparsity check 0.49999999148505075 +time 139.21 +27 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 24193.625 +err_fin 22671.08984375 +sparsity check 0.49999999148505075 +time 136.84 +28 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 50516.8359375 +err_fin 39330.2578125 +sparsity check 0.4999999701976776 +time 75.96 +28 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 26966.166015625 +err_fin 23884.134765625 +sparsity check 0.4999997615814209 +time 1.36 +28 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 28710.95703125 +err_fin 26541.1484375 +sparsity check 0.4999997615814209 +time 1.34 +28 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 3617.527587890625 +err_fin 1803.8687744140625 +sparsity check 0.4999999701976776 +time 69.00 +28 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 413293.46875 +err_fin 309840.375 +sparsity check 0.49999999148505075 +time 138.85 +28 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 407833.6875 +err_fin 304104.65625 +sparsity check 0.49999999148505075 +time 139.19 +28 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 25484.3671875 +err_fin 23980.056640625 +sparsity check 0.49999999148505075 +time 136.76 +29 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 50058.328125 +err_fin 39188.953125 +sparsity check 0.4999999701976776 +time 75.96 +29 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 23116.28515625 +err_fin 20367.78515625 +sparsity check 0.4999997615814209 +time 1.35 +29 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 31365.10546875 +err_fin 29057.857421875 +sparsity check 0.4999997615814209 +time 1.34 +29 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 4273.2138671875 +err_fin 2122.2392578125 +sparsity check 0.4999999701976776 +time 68.89 +29 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 437865.5625 +err_fin 329919.40625 +sparsity check 0.49999999148505075 +time 138.91 +29 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 430689.4375 +err_fin 322876.5 +sparsity check 0.49999999148505075 +time 139.30 +29 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 26731.2890625 +err_fin 25263.79296875 +sparsity check 0.49999999148505075 +time 136.71 +30 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 26032.23828125 +err_fin 20426.23046875 +sparsity check 0.4999999701976776 +time 75.98 +30 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 9992.404296875 +err_fin 8730.005859375 +sparsity check 0.4999997615814209 +time 1.52 +30 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 29013.9921875 +err_fin 26994.16015625 +sparsity check 0.4999997615814209 +time 1.34 +30 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 2086.343505859375 +err_fin 1034.9847412109375 +sparsity check 0.4999999701976776 +time 68.86 +30 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 464651.03125 +err_fin 352017.46875 +sparsity check 0.49999999148505075 +time 138.94 +30 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 455522.78125 +err_fin 343208.84375 +sparsity check 0.49999999148505075 +time 139.30 +30 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 27662.810546875 +err_fin 26182.16796875 +sparsity check 0.49999999148505075 +time 136.89 +31 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 52242.87890625 +err_fin 41066.94921875 +sparsity check 0.4999999701976776 +time 76.03 +31 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 23725.396484375 +err_fin 20942.234375 +sparsity check 0.4999997615814209 +time 1.36 +31 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 30603.60546875 +err_fin 28189.00390625 +sparsity check 0.4999997615814209 +time 1.35 +31 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 4107.51171875 +err_fin 2171.7705078125 +sparsity check 0.4999999701976776 +time 68.91 +31 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 484873.3125 +err_fin 370143.375 +sparsity check 0.49999999148505075 +time 138.93 +31 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 474644.0625 +err_fin 360518.09375 +sparsity check 0.49999999148505075 +time 139.24 +31 mlp.down_proj +Pruning ... +0.4999999872275761 0.2499999850988388 0.4285714200564793 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218755977567 0.9709505944546686 1.0 +err_prefin 28457.56640625 +err_fin 26989.67578125 +sparsity check 0.4999999872275761 +time 136.82 +32 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 59466.06640625 +err_fin 46928.921875 +sparsity check 0.4999999701976776 +time 75.97 +32 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 29597.052734375 +err_fin 26552.65234375 +sparsity check 0.4999997615814209 +time 1.62 +32 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 27953.755859375 +err_fin 25990.2265625 +sparsity check 0.4999997615814209 +time 1.34 +32 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 2870.14453125 +err_fin 1423.8023681640625 +sparsity check 0.4999999701976776 +time 68.91 +32 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 496590.8125 +err_fin 378051.59375 +sparsity check 0.49999999148505075 +time 138.96 +32 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 487239.75 +err_fin 369091.21875 +sparsity check 0.49999999148505075 +time 139.26 +32 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 30070.58984375 +err_fin 28461.32421875 +sparsity check 0.49999999148505075 +time 136.91 +33 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 83937.4140625 +err_fin 65693.96875 +sparsity check 0.4999999701976776 +time 75.92 +33 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 39462.5234375 +err_fin 35009.5546875 +sparsity check 0.4999997615814209 +time 1.35 +33 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 30598.1484375 +err_fin 28102.85546875 +sparsity check 0.4999997615814209 +time 1.34 +33 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 5600.32373046875 +err_fin 2763.80712890625 +sparsity check 0.4999999701976776 +time 68.86 +33 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 502270.46875 +err_fin 377882.3125 +sparsity check 0.49999999148505075 +time 138.87 +33 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 494249.8125 +err_fin 370007.4375 +sparsity check 0.49999999148505075 +time 139.20 +33 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 34105.328125 +err_fin 32116.03125 +sparsity check 0.49999999148505075 +time 136.67 +34 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 30010.17578125 +err_fin 23505.6015625 +sparsity check 0.4999999701976776 +time 75.99 +34 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 12229.2001953125 +err_fin 10336.8369140625 +sparsity check 0.4999997615814209 +time 1.34 +34 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 18207.46875 +err_fin 16517.2109375 +sparsity check 0.4999997615814209 +time 1.33 +34 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 3952.8564453125 +err_fin 2030.096923828125 +sparsity check 0.4999999701976776 +time 68.91 +34 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 517865.46875 +err_fin 391603.0 +sparsity check 0.49999999148505075 +time 139.05 +34 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 509327.125 +err_fin 383224.625 +sparsity check 0.49999999148505075 +time 139.37 +34 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 37172.03125 +err_fin 35148.7734375 +sparsity check 0.49999999148505075 +time 136.87 +35 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 49801.08203125 +err_fin 39204.73046875 +sparsity check 0.4999999701976776 +time 76.02 +35 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 20728.83203125 +err_fin 18156.15625 +sparsity check 0.4999997615814209 +time 1.36 +35 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 25123.96484375 +err_fin 23182.5234375 +sparsity check 0.4999997615814209 +time 1.33 +35 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 4984.70703125 +err_fin 2389.52978515625 +sparsity check 0.4999999701976776 +time 68.92 +35 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 549537.625 +err_fin 416325.875 +sparsity check 0.49999999148505075 +time 139.09 +35 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 539814.0 +err_fin 406956.34375 +sparsity check 0.49999999148505075 +time 139.37 +35 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 40008.2109375 +err_fin 37887.08203125 +sparsity check 0.49999999148505075 +time 136.92 +36 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 47921.19921875 +err_fin 37761.7421875 +sparsity check 0.4999999701976776 +time 76.00 +36 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 20009.556640625 +err_fin 17598.63671875 +sparsity check 0.4999997615814209 +time 1.37 +36 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 24585.205078125 +err_fin 22760.03125 +sparsity check 0.4999997615814209 +time 1.33 +36 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 3131.942138671875 +err_fin 1448.037353515625 +sparsity check 0.4999999701976776 +time 68.94 +36 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 561929.125 +err_fin 425099.125 +sparsity check 0.49999999148505075 +time 138.92 +36 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 552476.0625 +err_fin 415964.1875 +sparsity check 0.49999999148505075 +time 139.33 +36 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 44609.0078125 +err_fin 42124.6953125 +sparsity check 0.49999999148505075 +time 136.74 +37 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 74911.2265625 +err_fin 58578.91796875 +sparsity check 0.4999999701976776 +time 75.96 +37 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 34300.6171875 +err_fin 30461.64453125 +sparsity check 0.4999997615814209 +time 1.37 +37 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 31333.7265625 +err_fin 29009.8125 +sparsity check 0.4999997615814209 +time 1.34 +37 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 6033.38232421875 +err_fin 2680.28857421875 +sparsity check 0.4999999701976776 +time 68.97 +37 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 586511.5625 +err_fin 440298.9375 +sparsity check 0.49999999148505075 +time 138.95 +37 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 576712.9375 +err_fin 430935.0625 +sparsity check 0.49999999148505075 +time 138.94 +37 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 48698.3515625 +err_fin 45944.84375 +sparsity check 0.49999999148505075 +time 136.38 +38 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 84190.71875 +err_fin 65235.796875 +sparsity check 0.4999999701976776 +time 76.00 +38 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 38048.1484375 +err_fin 33385.6640625 +sparsity check 0.4999997615814209 +time 1.37 +38 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 32509.40234375 +err_fin 29642.828125 +sparsity check 0.4999997615814209 +time 1.34 +38 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 9220.7138671875 +err_fin 4203.4375 +sparsity check 0.4999999701976776 +time 68.99 +38 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 603327.75 +err_fin 451749.90625 +sparsity check 0.49999999148505075 +time 138.93 +38 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 594840.375 +err_fin 443057.9375 +sparsity check 0.49999999148505075 +time 139.26 +38 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 54537.9375 +err_fin 51356.5625 +sparsity check 0.49999999148505075 +time 136.76 +39 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 102629.9296875 +err_fin 79059.234375 +sparsity check 0.4999999701976776 +time 75.96 +39 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 51605.64453125 +err_fin 45724.78125 +sparsity check 0.4999997615814209 +time 1.35 +39 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 35260.58203125 +err_fin 32227.84375 +sparsity check 0.4999997615814209 +time 1.34 +39 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 11472.75390625 +err_fin 4916.529296875 +sparsity check 0.4999999701976776 +time 68.94 +39 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 608882.0625 +err_fin 449760.6875 +sparsity check 0.49999999148505075 +time 138.93 +39 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 600752.25 +err_fin 441877.0 +sparsity check 0.49999999148505075 +time 138.89 +39 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 66603.75 +err_fin 61970.1328125 +sparsity check 0.49999999148505075 +time 136.62 +40 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 95904.0 +err_fin 72537.21875 +sparsity check 0.4999999701976776 +time 75.98 +40 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 44928.734375 +err_fin 39576.1484375 +sparsity check 0.4999997615814209 +time 1.36 +40 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 39848.4140625 +err_fin 35984.0078125 +sparsity check 0.4999997615814209 +time 1.34 +40 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 16308.015625 +err_fin 8537.3857421875 +sparsity check 0.4999999701976776 +time 68.97 +40 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 631623.625 +err_fin 458495.96875 +sparsity check 0.49999999148505075 +time 138.92 +40 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 622759.1875 +err_fin 449699.3125 +sparsity check 0.49999999148505075 +time 139.24 +40 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 74991.9921875 +err_fin 69863.890625 +sparsity check 0.49999999148505075 +time 136.85 +41 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 77544.609375 +err_fin 58233.296875 +sparsity check 0.4999999701976776 +time 75.95 +41 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 34891.703125 +err_fin 30307.580078125 +sparsity check 0.4999997615814209 +time 1.51 +41 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 35122.8515625 +err_fin 31752.765625 +sparsity check 0.4999997615814209 +time 1.32 +41 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 17705.14453125 +err_fin 8125.5283203125 +sparsity check 0.4999999701976776 +time 68.88 +41 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 658906.0 +err_fin 467518.375 +sparsity check 0.49999999148505075 +time 138.89 +41 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 639130.25 +err_fin 450139.3125 +sparsity check 0.49999999148505075 +time 139.26 +41 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 87729.15625 +err_fin 80864.71875 +sparsity check 0.49999999148505075 +time 136.80 +42 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 89561.28125 +err_fin 65998.328125 +sparsity check 0.4999999701976776 +time 75.95 +42 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 37875.28125 +err_fin 33061.8125 +sparsity check 0.4999997615814209 +time 1.56 +42 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 43199.59375 +err_fin 39259.875 +sparsity check 0.4999997615814209 +time 1.34 +42 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 17064.9765625 +err_fin 8395.009765625 +sparsity check 0.4999999701976776 +time 68.94 +42 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 712042.875 +err_fin 499760.4375 +sparsity check 0.49999999148505075 +time 138.91 +42 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 675021.0 +err_fin 470063.28125 +sparsity check 0.49999999148505075 +time 139.32 +42 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 96181.5859375 +err_fin 88354.796875 +sparsity check 0.49999999148505075 +time 136.60 +43 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 62440.1484375 +err_fin 45790.125 +sparsity check 0.4999999701976776 +time 75.95 +43 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 30658.6875 +err_fin 26492.27734375 +sparsity check 0.4999997615814209 +time 1.57 +43 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 33607.234375 +err_fin 30055.046875 +sparsity check 0.4999997615814209 +time 1.38 +43 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 11535.37109375 +err_fin 6193.34130859375 +sparsity check 0.4999999701976776 +time 68.88 +43 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 740046.75 +err_fin 516331.59375 +sparsity check 0.49999999148505075 +time 138.91 +43 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 695492.625 +err_fin 481200.125 +sparsity check 0.49999999148505075 +time 138.88 +43 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 106719.265625 +err_fin 97812.765625 +sparsity check 0.49999999148505075 +time 136.83 +44 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 112646.9375 +err_fin 81647.5625 +sparsity check 0.4999999701976776 +time 75.94 +44 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 40573.47265625 +err_fin 35626.34375 +sparsity check 0.4999997615814209 +time 1.49 +44 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 52956.95703125 +err_fin 48484.390625 +sparsity check 0.4999997615814209 +time 1.35 +44 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 27315.21875 +err_fin 14398.6630859375 +sparsity check 0.4999999701976776 +time 68.88 +44 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 811938.8125 +err_fin 557512.125 +sparsity check 0.49999999148505075 +time 138.90 +44 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 737602.0 +err_fin 500840.0625 +sparsity check 0.49999999148505075 +time 139.20 +44 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 124876.71875 +err_fin 112474.78125 +sparsity check 0.49999999148505075 +time 136.65 +45 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 70487.9375 +err_fin 50279.3671875 +sparsity check 0.4999999701976776 +time 76.01 +45 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 32180.87109375 +err_fin 27765.591796875 +sparsity check 0.4999997615814209 +time 1.37 +45 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 51670.9296875 +err_fin 47043.04296875 +sparsity check 0.4999997615814209 +time 1.36 +45 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 8514.8125 +err_fin 4562.552734375 +sparsity check 0.4999999701976776 +time 68.86 +45 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 880760.625 +err_fin 604341.5 +sparsity check 0.49999999148505075 +time 138.91 +45 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 790777.375 +err_fin 536047.125 +sparsity check 0.49999999148505075 +time 138.94 +45 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 129625.2890625 +err_fin 116981.046875 +sparsity check 0.49999999148505075 +time 136.81 +46 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 60139.3671875 +err_fin 42844.4375 +sparsity check 0.4999999701976776 +time 75.94 +46 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 26271.033203125 +err_fin 22923.193359375 +sparsity check 0.4999997615814209 +time 1.38 +46 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 44671.41015625 +err_fin 41058.53125 +sparsity check 0.4999997615814209 +time 1.32 +46 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 10887.734375 +err_fin 5889.4287109375 +sparsity check 0.4999999701976776 +time 68.92 +46 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 946446.625 +err_fin 652435.25 +sparsity check 0.49999999148505075 +time 138.85 +46 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 836977.1875 +err_fin 570130.125 +sparsity check 0.49999999148505075 +time 139.17 +46 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 130404.3125 +err_fin 118459.3828125 +sparsity check 0.49999999148505075 +time 136.73 +47 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 81630.78125 +err_fin 58640.24609375 +sparsity check 0.4999999701976776 +time 75.99 +47 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 37684.5 +err_fin 32753.998046875 +sparsity check 0.4999997615814209 +time 1.56 +47 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 43192.5078125 +err_fin 39257.546875 +sparsity check 0.4999997615814209 +time 1.34 +47 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 17707.228515625 +err_fin 7706.35009765625 +sparsity check 0.4999999701976776 +time 69.01 +47 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 982978.0 +err_fin 670679.625 +sparsity check 0.49999999148505075 +time 138.97 +47 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 857838.8125 +err_fin 577950.375 +sparsity check 0.49999999148505075 +time 139.26 +47 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 146335.8125 +err_fin 131420.78125 +sparsity check 0.49999999148505075 +time 136.71 +48 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 41655.484375 +err_fin 29628.703125 +sparsity check 0.4999999701976776 +time 75.99 +48 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 14035.11328125 +err_fin 11946.5283203125 +sparsity check 0.4999997615814209 +time 1.51 +48 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 42893.5 +err_fin 39142.015625 +sparsity check 0.4999997615814209 +time 1.35 +48 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 11646.501953125 +err_fin 6169.419921875 +sparsity check 0.4999999701976776 +time 68.97 +48 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1029321.1875 +err_fin 699118.0 +sparsity check 0.49999999148505075 +time 138.92 +48 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 894293.875 +err_fin 599440.0 +sparsity check 0.49999999148505075 +time 139.21 +48 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 146734.140625 +err_fin 132421.6875 +sparsity check 0.49999999148505075 +time 136.81 +49 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 23678.34765625 +err_fin 16803.919921875 +sparsity check 0.4999999701976776 +time 75.93 +49 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 9243.3369140625 +err_fin 7814.763671875 +sparsity check 0.4999997615814209 +time 1.34 +49 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 34349.76953125 +err_fin 30772.12890625 +sparsity check 0.4999997615814209 +time 1.34 +49 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 4523.0859375 +err_fin 2152.568603515625 +sparsity check 0.4999999701976776 +time 68.84 +49 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1071935.5 +err_fin 728369.125 +sparsity check 0.49999999148505075 +time 138.89 +49 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 929909.875 +err_fin 623808.25 +sparsity check 0.49999999148505075 +time 139.17 +49 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 147430.875 +err_fin 133642.25 +sparsity check 0.49999999148505075 +time 136.77 +50 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 37368.7265625 +err_fin 26441.6953125 +sparsity check 0.4999999701976776 +time 76.00 +50 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 12596.9892578125 +err_fin 10804.3369140625 +sparsity check 0.4999997615814209 +time 1.52 +50 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 44531.25 +err_fin 40123.015625 +sparsity check 0.4999997615814209 +time 1.34 +50 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 8514.12890625 +err_fin 4372.64111328125 +sparsity check 0.4999999701976776 +time 68.95 +50 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1113480.625 +err_fin 758171.3125 +sparsity check 0.49999999148505075 +time 138.95 +50 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 962699.9375 +err_fin 647435.0 +sparsity check 0.49999999148505075 +time 139.22 +50 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 148243.84375 +err_fin 134802.15625 +sparsity check 0.49999999148505075 +time 136.84 +51 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 47897.08203125 +err_fin 33948.296875 +sparsity check 0.4999999701976776 +time 75.99 +51 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 19351.279296875 +err_fin 16782.765625 +sparsity check 0.4999997615814209 +time 1.57 +51 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 46854.2265625 +err_fin 42734.59765625 +sparsity check 0.4999997615814209 +time 1.34 +51 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 7749.6162109375 +err_fin 3622.430419921875 +sparsity check 0.4999999701976776 +time 68.89 +51 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1145907.5 +err_fin 782892.875 +sparsity check 0.49999999148505075 +time 138.88 +51 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 993129.375 +err_fin 670268.75 +sparsity check 0.49999999148505075 +time 139.17 +51 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 148951.9375 +err_fin 135756.546875 +sparsity check 0.49999999148505075 +time 136.77 +52 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 80784.09375 +err_fin 57475.87890625 +sparsity check 0.4999999701976776 +time 76.02 +52 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 25809.490234375 +err_fin 22641.80859375 +sparsity check 0.4999997615814209 +time 1.34 +52 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 53536.453125 +err_fin 48914.1953125 +sparsity check 0.4999997615814209 +time 1.33 +52 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 15624.708984375 +err_fin 8132.90966796875 +sparsity check 0.4999999701976776 +time 68.91 +52 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1177332.25 +err_fin 808526.6875 +sparsity check 0.49999999148505075 +time 138.95 +52 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1019252.5 +err_fin 691205.625 +sparsity check 0.49999999148505075 +time 139.23 +52 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 152510.71875 +err_fin 139273.8125 +sparsity check 0.49999999148505075 +time 136.73 +53 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 25649.494140625 +err_fin 18411.953125 +sparsity check 0.4999999701976776 +time 75.99 +53 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 7535.6396484375 +err_fin 6428.8427734375 +sparsity check 0.4999997615814209 +time 1.37 +53 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 44588.1484375 +err_fin 40414.9140625 +sparsity check 0.4999997615814209 +time 1.34 +53 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 6829.2353515625 +err_fin 3247.128173828125 +sparsity check 0.4999999701976776 +time 68.89 +53 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1209166.625 +err_fin 829818.75 +sparsity check 0.49999999148505075 +time 138.86 +53 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1049038.875 +err_fin 711064.875 +sparsity check 0.49999999148505075 +time 139.17 +53 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 154057.25 +err_fin 140811.40625 +sparsity check 0.49999999148505075 +time 136.69 +54 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 29969.533203125 +err_fin 21441.796875 +sparsity check 0.4999999701976776 +time 75.96 +54 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 8616.0869140625 +err_fin 7288.6494140625 +sparsity check 0.4999997615814209 +time 1.36 +54 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 45932.27734375 +err_fin 41775.0546875 +sparsity check 0.4999997615814209 +time 1.34 +54 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 7486.509765625 +err_fin 3676.146484375 +sparsity check 0.4999999701976776 +time 68.88 +54 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1239661.625 +err_fin 853401.875 +sparsity check 0.49999999148505075 +time 138.85 +54 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1077716.75 +err_fin 732870.1875 +sparsity check 0.49999999148505075 +time 139.19 +54 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 154116.6875 +err_fin 141300.8125 +sparsity check 0.49999999148505075 +time 136.87 +55 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 48705.125 +err_fin 34947.796875 +sparsity check 0.4999999701976776 +time 75.95 +55 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 16973.052734375 +err_fin 14659.681640625 +sparsity check 0.4999997615814209 +time 1.38 +55 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 59642.6875 +err_fin 54076.9296875 +sparsity check 0.4999997615814209 +time 1.38 +55 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 14595.42578125 +err_fin 6802.30859375 +sparsity check 0.4999999701976776 +time 68.94 +55 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1263997.25 +err_fin 872225.25 +sparsity check 0.49999999148505075 +time 138.89 +55 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1107316.75 +err_fin 755348.75 +sparsity check 0.49999999148505075 +time 139.25 +55 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 157797.8125 +err_fin 145139.59375 +sparsity check 0.49999999148505075 +time 136.74 +56 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 36475.2578125 +err_fin 26326.166015625 +sparsity check 0.4999999701976776 +time 75.96 +56 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 13547.791015625 +err_fin 11655.0546875 +sparsity check 0.4999997615814209 +time 1.36 +56 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 42385.390625 +err_fin 38619.52734375 +sparsity check 0.4999997615814209 +time 1.33 +56 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 7914.76416015625 +err_fin 3646.15185546875 +sparsity check 0.4999999701976776 +time 68.93 +56 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1286053.75 +err_fin 887569.0625 +sparsity check 0.49999999148505075 +time 138.89 +56 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1129318.25 +err_fin 770670.5 +sparsity check 0.49999999148505075 +time 139.21 +56 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 160301.1875 +err_fin 147516.8125 +sparsity check 0.49999999148505075 +time 136.81 +57 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 33035.65625 +err_fin 23655.130859375 +sparsity check 0.4999999701976776 +time 75.92 +57 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 10187.09765625 +err_fin 8785.1015625 +sparsity check 0.4999997615814209 +time 1.36 +57 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 54517.59375 +err_fin 49678.390625 +sparsity check 0.4999997615814209 +time 1.32 +57 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 8301.5009765625 +err_fin 4029.38525390625 +sparsity check 0.4999999701976776 +time 68.92 +57 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1327755.375 +err_fin 918436.375 +sparsity check 0.49999999148505075 +time 138.88 +57 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1167561.625 +err_fin 799117.75 +sparsity check 0.49999999148505075 +time 139.09 +57 mlp.down_proj +Pruning ... +0.4999999872275761 0.2499999701976776 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218706168299 0.9709505944546686 1.0 +err_prefin 163075.703125 +err_fin 150484.9375 +sparsity check 0.4999999872275761 +time 136.73 +58 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 18900.6484375 +err_fin 13654.41015625 +sparsity check 0.4999999701976776 +time 75.92 +58 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 5345.603515625 +err_fin 4459.5361328125 +sparsity check 0.4999997615814209 +time 1.36 +58 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 39029.65234375 +err_fin 35217.87109375 +sparsity check 0.4999997615814209 +time 1.35 +58 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 5688.50634765625 +err_fin 2834.5869140625 +sparsity check 0.4999999701976776 +time 68.87 +58 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1357102.125 +err_fin 942058.375 +sparsity check 0.49999999148505075 +time 138.87 +58 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1195968.75 +err_fin 821476.5 +sparsity check 0.49999999148505075 +time 139.18 +58 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 164241.65625 +err_fin 151835.625 +sparsity check 0.49999999148505075 +time 136.75 +59 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 23973.05859375 +err_fin 17286.46875 +sparsity check 0.4999999701976776 +time 75.95 +59 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 6800.9345703125 +err_fin 5795.3671875 +sparsity check 0.4999997615814209 +time 1.36 +59 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 43333.12109375 +err_fin 38987.5859375 +sparsity check 0.4999997615814209 +time 1.34 +59 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 5351.494140625 +err_fin 2592.425537109375 +sparsity check 0.4999999701976776 +time 68.90 +59 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1390754.75 +err_fin 968292.125 +sparsity check 0.49999999148505075 +time 138.90 +59 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1228030.375 +err_fin 846357.375 +sparsity check 0.49999999148505075 +time 138.88 +59 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 164491.46875 +err_fin 152553.453125 +sparsity check 0.49999999148505075 +time 136.36 +60 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 4144.17578125 +err_fin 2987.91259765625 +sparsity check 0.4999999701976776 +time 75.95 +60 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 1082.2666015625 +err_fin 884.3472900390625 +sparsity check 0.4999997615814209 +time 1.36 +60 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 23628.416015625 +err_fin 21147.66015625 +sparsity check 0.4999997615814209 +time 1.33 +60 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 3283.666015625 +err_fin 1680.838134765625 +sparsity check 0.4999999701976776 +time 68.87 +60 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1413586.75 +err_fin 988726.5 +sparsity check 0.49999999148505075 +time 138.83 +60 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1253862.875 +err_fin 868783.25 +sparsity check 0.49999999148505075 +time 139.15 +60 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 164164.6875 +err_fin 152643.6875 +sparsity check 0.49999999148505075 +time 136.74 +61 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 15371.099609375 +err_fin 11208.525390625 +sparsity check 0.4999999701976776 +time 75.96 +61 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 4000.73388671875 +err_fin 3378.706787109375 +sparsity check 0.4999997615814209 +time 1.38 +61 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 40129.45703125 +err_fin 36481.453125 +sparsity check 0.4999997615814209 +time 1.34 +61 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 5390.501953125 +err_fin 2717.38037109375 +sparsity check 0.4999999701976776 +time 68.89 +61 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1440616.0 +err_fin 1010316.6875 +sparsity check 0.49999999148505075 +time 138.89 +61 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1284281.875 +err_fin 891866.625 +sparsity check 0.49999999148505075 +time 138.90 +61 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 164991.875 +err_fin 153754.4375 +sparsity check 0.49999999148505075 +time 136.36 +62 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 16652.97265625 +err_fin 12181.880859375 +sparsity check 0.4999999701976776 +time 75.96 +62 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 4841.4033203125 +err_fin 4092.40771484375 +sparsity check 0.4999997615814209 +time 1.34 +62 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 43296.96484375 +err_fin 39043.84375 +sparsity check 0.4999997615814209 +time 1.36 +62 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 4511.126953125 +err_fin 2035.7476806640625 +sparsity check 0.4999999701976776 +time 68.90 +62 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1472194.625 +err_fin 1036582.5 +sparsity check 0.49999999148505075 +time 138.91 +62 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1319016.75 +err_fin 919966.0 +sparsity check 0.49999999148505075 +time 139.19 +62 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 166857.40625 +err_fin 155675.09375 +sparsity check 0.49999999148505075 +time 136.79 +63 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 15162.9658203125 +err_fin 11185.603515625 +sparsity check 0.4999999701976776 +time 75.93 +63 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 3691.4169921875 +err_fin 3106.723388671875 +sparsity check 0.4999997615814209 +time 1.59 +63 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 33136.32421875 +err_fin 29420.41796875 +sparsity check 0.4999997615814209 +time 1.37 +63 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 3492.001708984375 +err_fin 1740.047119140625 +sparsity check 0.4999999701976776 +time 68.85 +63 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1507286.125 +err_fin 1064654.25 +sparsity check 0.49999999148505075 +time 138.91 +63 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1352012.125 +err_fin 947054.25 +sparsity check 0.49999999148505075 +time 138.84 +63 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 169400.21875 +err_fin 158286.328125 +sparsity check 0.49999999148505075 +time 136.67 +64 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 27923.83984375 +err_fin 20545.359375 +sparsity check 0.4999999701976776 +time 75.98 +64 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 8380.58984375 +err_fin 7131.638671875 +sparsity check 0.4999997615814209 +time 1.35 +64 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 49114.46484375 +err_fin 44746.3828125 +sparsity check 0.4999997615814209 +time 1.35 +64 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 7756.88671875 +err_fin 3477.064453125 +sparsity check 0.4999999701976776 +time 68.85 +64 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1537956.125 +err_fin 1089393.5 +sparsity check 0.49999999148505075 +time 138.85 +64 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1384552.125 +err_fin 972593.1875 +sparsity check 0.49999999148505075 +time 139.24 +64 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 172489.484375 +err_fin 161310.234375 +sparsity check 0.49999999148505075 +time 136.71 +65 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 7846.609375 +err_fin 5812.009765625 +sparsity check 0.4999999701976776 +time 75.97 +65 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 1819.07470703125 +err_fin 1485.857177734375 +sparsity check 0.4999997615814209 +time 1.34 +65 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 27047.845703125 +err_fin 24197.2421875 +sparsity check 0.4999997615814209 +time 1.33 +65 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 3190.85546875 +err_fin 1521.27197265625 +sparsity check 0.4999999701976776 +time 68.88 +65 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1566413.25 +err_fin 1113701.0 +sparsity check 0.49999999148505075 +time 138.87 +65 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1415837.0 +err_fin 999027.0 +sparsity check 0.49999999148505075 +time 139.19 +65 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 175696.875 +err_fin 164576.796875 +sparsity check 0.49999999148505075 +time 136.83 +66 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 13434.75 +err_fin 9914.8017578125 +sparsity check 0.4999999701976776 +time 76.00 +66 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 3435.99365234375 +err_fin 2858.035400390625 +sparsity check 0.4999997615814209 +time 1.34 +66 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 35824.453125 +err_fin 32533.126953125 +sparsity check 0.4999997615814209 +time 1.33 +66 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 6657.2587890625 +err_fin 3521.1337890625 +sparsity check 0.4999999701976776 +time 68.82 +66 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1604164.5 +err_fin 1140492.0 +sparsity check 0.49999999148505075 +time 138.92 +66 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1458797.75 +err_fin 1029967.8125 +sparsity check 0.49999999148505075 +time 139.29 +66 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 183853.59375 +err_fin 172169.96875 +sparsity check 0.49999999148505075 +time 136.88 +67 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 6373.236328125 +err_fin 4719.2001953125 +sparsity check 0.4999999701976776 +time 75.97 +67 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 1663.343994140625 +err_fin 1330.06787109375 +sparsity check 0.4999997615814209 +time 1.36 +67 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 14204.0703125 +err_fin 12517.884765625 +sparsity check 0.4999997615814209 +time 1.33 +67 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 1791.4381103515625 +err_fin 752.0675048828125 +sparsity check 0.4999999701976776 +time 69.02 +67 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1622045.875 +err_fin 1155999.375 +sparsity check 0.49999999148505075 +time 138.94 +67 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1486747.0 +err_fin 1052275.875 +sparsity check 0.49999999148505075 +time 139.22 +67 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 186473.1875 +err_fin 174738.625 +sparsity check 0.49999999148505075 +time 136.88 +68 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 51552.6484375 +err_fin 38091.5234375 +sparsity check 0.4999999701976776 +time 75.94 +68 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 19221.5234375 +err_fin 16823.7265625 +sparsity check 0.4999997615814209 +time 1.36 +68 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 71151.9140625 +err_fin 65181.734375 +sparsity check 0.4999997615814209 +time 1.34 +68 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 7288.1435546875 +err_fin 3690.9140625 +sparsity check 0.4999999701976776 +time 68.89 +68 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1680152.0 +err_fin 1199584.5 +sparsity check 0.49999999148505075 +time 139.07 +68 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1547915.75 +err_fin 1097996.0 +sparsity check 0.49999999148505075 +time 139.32 +68 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 199138.234375 +err_fin 186279.90625 +sparsity check 0.49999999148505075 +time 136.83 +69 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 86524.171875 +err_fin 64054.6796875 +sparsity check 0.4999999701976776 +time 75.96 +69 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 44817.75 +err_fin 39679.12109375 +sparsity check 0.4999997615814209 +time 1.52 +69 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 93110.2578125 +err_fin 84286.015625 +sparsity check 0.4999997615814209 +time 1.35 +69 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 11422.0576171875 +err_fin 5443.55712890625 +sparsity check 0.4999999701976776 +time 68.90 +69 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1712167.25 +err_fin 1222631.0 +sparsity check 0.49999999148505075 +time 139.05 +69 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1595281.0 +err_fin 1132071.125 +sparsity check 0.49999999148505075 +time 139.24 +69 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 209077.484375 +err_fin 195700.515625 +sparsity check 0.49999999148505075 +time 136.84 +70 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 49750.33984375 +err_fin 36952.296875 +sparsity check 0.4999999701976776 +time 75.92 +70 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 19357.1484375 +err_fin 16941.390625 +sparsity check 0.4999997615814209 +time 1.34 +70 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 53286.68359375 +err_fin 48323.54296875 +sparsity check 0.4999997615814209 +time 1.33 +70 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 8727.7646484375 +err_fin 4174.57666015625 +sparsity check 0.4999999701976776 +time 68.85 +70 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1752425.25 +err_fin 1251169.75 +sparsity check 0.49999999148505075 +time 138.85 +70 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1640509.25 +err_fin 1165366.25 +sparsity check 0.49999999148505075 +time 139.19 +70 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 223513.625 +err_fin 209054.53125 +sparsity check 0.49999999148505075 +time 136.75 +71 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 67431.9296875 +err_fin 50007.89453125 +sparsity check 0.4999999701976776 +time 75.93 +71 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 33746.6015625 +err_fin 29709.49609375 +sparsity check 0.4999997615814209 +time 1.37 +71 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 57293.171875 +err_fin 52068.56640625 +sparsity check 0.4999997615814209 +time 1.34 +71 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 11074.923828125 +err_fin 5226.69140625 +sparsity check 0.4999999701976776 +time 68.91 +71 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1807681.625 +err_fin 1288131.25 +sparsity check 0.49999999148505075 +time 138.86 +71 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1697690.5 +err_fin 1203887.5 +sparsity check 0.49999999148505075 +time 139.18 +71 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 237044.9375 +err_fin 221369.625 +sparsity check 0.49999999148505075 +time 136.83 +72 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 93618.8125 +err_fin 69071.015625 +sparsity check 0.4999999701976776 +time 75.95 +72 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 48187.52734375 +err_fin 42756.87890625 +sparsity check 0.4999997615814209 +time 1.36 +72 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 67075.671875 +err_fin 61100.4296875 +sparsity check 0.4999997615814209 +time 1.34 +72 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 14306.2294921875 +err_fin 7519.7373046875 +sparsity check 0.4999999701976776 +time 69.02 +72 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1851395.25 +err_fin 1315942.0 +sparsity check 0.49999999148505075 +time 138.87 +72 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1749960.5 +err_fin 1238201.0 +sparsity check 0.49999999148505075 +time 139.24 +72 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 254901.0625 +err_fin 237732.640625 +sparsity check 0.49999999148505075 +time 136.71 +73 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 86980.6171875 +err_fin 63962.2265625 +sparsity check 0.4999999701976776 +time 76.01 +73 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 47429.7578125 +err_fin 42070.00390625 +sparsity check 0.4999997615814209 +time 1.39 +73 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 76576.703125 +err_fin 68824.5703125 +sparsity check 0.4999997615814209 +time 1.33 +73 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 9243.6279296875 +err_fin 4813.7822265625 +sparsity check 0.4999999701976776 +time 68.90 +73 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1896136.5 +err_fin 1343811.0 +sparsity check 0.49999999148505075 +time 138.88 +73 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1800215.875 +err_fin 1270323.25 +sparsity check 0.49999999148505075 +time 139.21 +73 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 280490.6875 +err_fin 260274.953125 +sparsity check 0.49999999148505075 +time 136.73 +74 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 79794.359375 +err_fin 58623.734375 +sparsity check 0.4999999701976776 +time 75.98 +74 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 38902.015625 +err_fin 34011.27734375 +sparsity check 0.4999997615814209 +time 1.35 +74 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 57903.9140625 +err_fin 51672.7109375 +sparsity check 0.4999997615814209 +time 1.35 +74 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 18365.837890625 +err_fin 8362.603515625 +sparsity check 0.4999999701976776 +time 69.05 +74 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1939999.5 +err_fin 1359626.5 +sparsity check 0.49999999148505075 +time 138.90 +74 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1847464.0 +err_fin 1289308.75 +sparsity check 0.49999999148505075 +time 139.21 +74 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 308198.96875 +err_fin 284229.5625 +sparsity check 0.49999999148505075 +time 136.83 +75 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 80345.03125 +err_fin 58214.7421875 +sparsity check 0.4999999701976776 +time 75.94 +75 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 38695.88671875 +err_fin 33634.078125 +sparsity check 0.4999997615814209 +time 1.34 +75 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 65055.8046875 +err_fin 57275.7109375 +sparsity check 0.4999997615814209 +time 1.33 +75 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 16365.791015625 +err_fin 7433.798828125 +sparsity check 0.4999999701976776 +time 68.93 +75 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1954902.0 +err_fin 1359252.625 +sparsity check 0.49999999148505075 +time 138.84 +75 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1860311.875 +err_fin 1288227.5 +sparsity check 0.49999999148505075 +time 139.20 +75 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 346329.8125 +err_fin 315204.3125 +sparsity check 0.49999999148505075 +time 136.80 +76 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 120414.59375 +err_fin 85542.703125 +sparsity check 0.4999999701976776 +time 75.95 +76 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 55974.1171875 +err_fin 47984.98046875 +sparsity check 0.4999997615814209 +time 1.38 +76 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 101912.640625 +err_fin 89068.21875 +sparsity check 0.4999997615814209 +time 1.34 +76 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 59227.875 +err_fin 30573.484375 +sparsity check 0.4999999701976776 +time 68.86 +76 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1939324.0 +err_fin 1319095.5 +sparsity check 0.49999999148505075 +time 138.87 +76 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1831292.625 +err_fin 1240294.0 +sparsity check 0.49999999148505075 +time 138.87 +76 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 416353.4375 +err_fin 369301.09375 +sparsity check 0.49999999148505075 +time 136.85 +77 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 79319.734375 +err_fin 55092.390625 +sparsity check 0.4999999701976776 +time 75.92 +77 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 39984.7890625 +err_fin 33587.5859375 +sparsity check 0.4999997615814209 +time 1.59 +77 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 68770.40625 +err_fin 58922.96875 +sparsity check 0.4999997615814209 +time 1.33 +77 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 23712.392578125 +err_fin 9312.755859375 +sparsity check 0.4999999701976776 +time 68.95 +77 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1806763.25 +err_fin 1191378.0 +sparsity check 0.49999999148505075 +time 138.91 +77 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1707377.125 +err_fin 1120868.5 +sparsity check 0.49999999148505075 +time 139.24 +77 mlp.down_proj +Pruning ... +0.4999999872275761 0.2499999850988388 0.4285714200564793 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218755977567 0.9709505944546686 1.0 +err_prefin 478436.125 +err_fin 409042.5 +sparsity check 0.4999999872275761 +time 136.94 +78 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 70711.5 +err_fin 46775.28125 +sparsity check 0.4999999701976776 +time 75.93 +78 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 37181.0546875 +err_fin 29953.458984375 +sparsity check 0.4999997615814209 +time 1.37 +78 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 61483.86328125 +err_fin 52091.96875 +sparsity check 0.4999997615814209 +time 1.33 +78 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 14208.9140625 +err_fin 6226.3701171875 +sparsity check 0.4999999701976776 +time 68.91 +78 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1509469.75 +err_fin 952065.8125 +sparsity check 0.49999999148505075 +time 138.85 +78 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1410296.25 +err_fin 886377.25 +sparsity check 0.49999999148505075 +time 139.19 +78 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 512088.1875 +err_fin 409127.8125 +sparsity check 0.49999999148505075 +time 136.74 +79 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 44558.09375 +err_fin 27206.3828125 +sparsity check 0.4999999701976776 +time 75.96 +79 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 29653.58203125 +err_fin 23229.486328125 +sparsity check 0.4999997615814209 +time 1.38 +79 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 26951.17578125 +err_fin 21621.861328125 +sparsity check 0.4999997615814209 +time 1.35 +79 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 3706.9072265625 +err_fin 1084.0416259765625 +sparsity check 0.4999999701976776 +time 68.97 +79 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 924403.75 +err_fin 530493.5 +sparsity check 0.49999999148505075 +time 138.87 +79 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 843905.0 +err_fin 485972.6875 +sparsity check 0.49999999148505075 +time 139.17 +79 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 456901.625 +err_fin 296230.0 +sparsity check 0.49999999148505075 +time 136.78 +model.embed_tokens.weight tensor(2.5520e-06) +model.layers.0.self_attn.q_proj.weight tensor(0.0083) +model.layers.0.self_attn.k_proj.weight tensor(0.0117) +model.layers.0.self_attn.v_proj.weight tensor(0.0441) +model.layers.0.self_attn.o_proj.weight tensor(3.3528e-06) +model.layers.0.mlp.gate_proj.weight tensor(0.0001) +model.layers.0.mlp.up_proj.weight tensor(0.0001) +model.layers.0.mlp.down_proj.weight tensor(0.0047) +51414.78756856918 +Dataset: wikitext2 +Evaluating ... +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +Perplexity: 3.443130 diff --git a/logs/llama2-70-0.5-fix-mask b/logs/llama2-70-0.5-fix-mask new file mode 100644 index 0000000..990a313 --- /dev/null +++ b/logs/llama2-70-0.5-fix-mask @@ -0,0 +1,4020 @@ +Running on dev: cuda:0 +loading llama +llama loaded +Starting... on device cuda:0 +model.layers.0.self_attn.q_proj.weight torch.Size([8192, 8192]) (8192, 8192) 0.1 +model.layers.0.self_attn.k_proj.weight torch.Size([1024, 8192]) (1024, 8192) 0.2 +model.layers.0.mlp.gate_proj.weight torch.Size([28672, 8192]) (8192, 28672) 0.2 +Ready. +0 self_attn.q_proj +Pruning ... +0.49272334575653076 0.09272335469722748 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.416445870607917 0.9709505944546686 1.0 +err_prefin 34.08806610107422 +err_fin 7.878982067108154 +sparsity check 0.49272334575653076 +time 74.93 +0 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 55.34233474731445 +err_fin 13.895328521728516 +sparsity check 0.49999988079071045 +time 1.30 +0 self_attn.v_proj +Pruning ... +0.4999997615814209 0.1999988555908203 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0884365910305762 0.9709505944546686 1.0 +err_prefin 3.7334351539611816 +err_fin 1.9356110095977783 +sparsity check 0.4999997615814209 +time 1.30 +0 self_attn.o_proj +Pruning ... +0.4912364035844803 0.091236412525177 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4115339808813105 0.9709505944546686 1.0 +err_prefin 1.4021973609924316 +err_fin 0.05483083426952362 +sparsity check 0.4912364035844803 +time 67.76 +0 mlp.gate_proj +Pruning ... +0.49999311566352844 0.1999758630990982 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968090810583558 0.9709505944546686 1.0 +err_prefin 547.6597900390625 +err_fin 144.9074249267578 +sparsity check 0.49999311566352844 +time 137.57 +0 mlp.up_proj +Pruning ... +0.49999311566352844 0.1999758630990982 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968090810583558 0.9709505944546686 1.0 +err_prefin 552.895263671875 +err_fin 144.273193359375 +sparsity check 0.49999311566352844 +time 137.66 +0 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 9.972746849060059 +err_fin 4.442782402038574 +sparsity check 0.49999999574252535 +time 135.36 +1 self_attn.q_proj +Pruning ... +0.49007223546504974 0.09007224440574646 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4076615027389625 0.9709505944546686 1.0 +err_prefin 458.649658203125 +err_fin 89.62268829345703 +sparsity check 0.49007223546504974 +time 74.90 +1 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 545.6114501953125 +err_fin 140.63705444335938 +sparsity check 0.49999988079071045 +time 1.31 +1 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 25.592668533325195 +err_fin 13.595357894897461 +sparsity check 0.49999988079071045 +time 1.30 +1 self_attn.o_proj +Pruning ... +0.4849788695573807 0.08497887849807739 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3904346718109546 0.9709505944546686 1.0 +err_prefin 27.439220428466797 +err_fin 3.2743263244628906 +sparsity check 0.4849788695573807 +time 67.78 +1 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 4326.44580078125 +err_fin 962.2832641601562 +sparsity check 0.49999999574252535 +time 137.43 +1 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 4773.0712890625 +err_fin 1025.191162109375 +sparsity check 0.49999999574252535 +time 137.71 +1 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 185.38607788085938 +err_fin 147.3865509033203 +sparsity check 0.49999999574252535 +time 135.37 +2 self_attn.q_proj +Pruning ... +0.4988190531730652 0.0988190621137619 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4361914811403524 0.9709505944546686 1.0 +err_prefin 1570.6591796875 +err_fin 475.0574951171875 +sparsity check 0.4988190531730652 +time 74.82 +2 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 2552.8427734375 +err_fin 1128.4437255859375 +sparsity check 0.49999988079071045 +time 1.32 +2 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 133.1376190185547 +err_fin 89.30953979492188 +sparsity check 0.49999988079071045 +time 1.31 +2 self_attn.o_proj +Pruning ... +0.49749068915843964 0.09749069809913635 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.431941036871602 0.9709505944546686 1.0 +err_prefin 136.1435546875 +err_fin 31.312829971313477 +sparsity check 0.49749068915843964 +time 67.73 +2 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 17526.8671875 +err_fin 5676.291015625 +sparsity check 0.49999999574252535 +time 137.42 +2 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 18264.2578125 +err_fin 5755.36572265625 +sparsity check 0.49999999574252535 +time 137.61 +2 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 700.30712890625 +err_fin 593.2619018554688 +sparsity check 0.49999999574252535 +time 135.42 +3 self_attn.q_proj +Pruning ... +0.49996353685855865 0.09996354579925537 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4398306150793116 0.9709505944546686 1.0 +err_prefin 14849.3662109375 +err_fin 6321.51171875 +sparsity check 0.49996353685855865 +time 74.82 +3 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 10442.033203125 +err_fin 6449.4130859375 +sparsity check 0.49999988079071045 +time 1.33 +3 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 1802.367431640625 +err_fin 1313.1429443359375 +sparsity check 0.49999988079071045 +time 1.31 +3 self_attn.o_proj +Pruning ... +0.4996228814125061 0.09962289035320282 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4387496324175322 0.9709505944546686 1.0 +err_prefin 299.18115234375 +err_fin 110.90554809570312 +sparsity check 0.4996228814125061 +time 67.67 +3 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 39038.39453125 +err_fin 16544.859375 +sparsity check 0.49999999574252535 +time 137.43 +3 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 39166.4375 +err_fin 16271.880859375 +sparsity check 0.49999999574252535 +time 137.72 +3 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 962.98828125 +err_fin 852.9772338867188 +sparsity check 0.49999999574252535 +time 135.38 +4 self_attn.q_proj +Pruning ... +0.49993960559368134 0.09993961453437805 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4397547361857166 0.9709505944546686 1.0 +err_prefin 29343.0859375 +err_fin 14346.1953125 +sparsity check 0.49993960559368134 +time 74.83 +4 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 13446.9453125 +err_fin 8533.0625 +sparsity check 0.49999988079071045 +time 1.32 +4 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 3019.1005859375 +err_fin 2351.447998046875 +sparsity check 0.49999988079071045 +time 1.31 +4 self_attn.o_proj +Pruning ... +0.49829351902008057 0.09829352796077728 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4345133354141861 0.9709505944546686 1.0 +err_prefin 358.5370788574219 +err_fin 145.6417236328125 +sparsity check 0.49829351902008057 +time 67.70 +4 mlp.gate_proj +Pruning ... +0.4999933583395822 0.19997671246528625 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196809566462352 0.9709505944546686 1.0 +err_prefin 64526.4375 +err_fin 32204.076171875 +sparsity check 0.4999933583395822 +time 137.40 +4 mlp.up_proj +Pruning ... +0.499943665095738 0.19980278611183167 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196710130561604 0.9709505944546686 1.0 +err_prefin 63318.7421875 +err_fin 31148.4375 +sparsity check 0.499943665095738 +time 137.70 +4 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 1476.088134765625 +err_fin 1328.525146484375 +sparsity check 0.49999999574252535 +time 135.37 +5 self_attn.q_proj +Pruning ... +0.49997615814208984 0.09997616708278656 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4398706297009556 0.9709505944546686 1.0 +err_prefin 45802.046875 +err_fin 24824.36328125 +sparsity check 0.49997615814208984 +time 74.82 +5 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 19897.65625 +err_fin 14027.8740234375 +sparsity check 0.49999988079071045 +time 1.32 +5 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 4540.630859375 +err_fin 3649.612548828125 +sparsity check 0.49999988079071045 +time 1.31 +5 self_attn.o_proj +Pruning ... +0.4997566342353821 0.0997566431760788 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4391742849238163 0.9709505944546686 1.0 +err_prefin 603.4962768554688 +err_fin 235.88951110839844 +sparsity check 0.4997566342353821 +time 67.68 +5 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 81626.109375 +err_fin 44528.7421875 +sparsity check 0.49999999574252535 +time 137.42 +5 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 79629.28125 +err_fin 42902.3046875 +sparsity check 0.49999999574252535 +time 137.70 +5 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2072.517578125 +err_fin 1853.76708984375 +sparsity check 0.49999999574252535 +time 135.33 +6 self_attn.q_proj +Pruning ... +0.4999881684780121 0.0999881774187088 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399087049976413 0.9709505944546686 1.0 +err_prefin 52415.0 +err_fin 29954.88671875 +sparsity check 0.4999881684780121 +time 74.80 +6 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 21953.89453125 +err_fin 16100.9296875 +sparsity check 0.49999988079071045 +time 1.31 +6 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 5227.771484375 +err_fin 4343.818359375 +sparsity check 0.49999988079071045 +time 1.31 +6 self_attn.o_proj +Pruning ... +0.4975365102291107 0.09753651916980743 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4320881330342738 0.9709505944546686 1.0 +err_prefin 1031.421142578125 +err_fin 476.410888671875 +sparsity check 0.4975365102291107 +time 67.68 +6 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 108566.34375 +err_fin 61947.36328125 +sparsity check 0.49999999574252535 +time 137.41 +6 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 105072.3828125 +err_fin 59408.34765625 +sparsity check 0.49999999574252535 +time 137.70 +6 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 3063.17919921875 +err_fin 2773.345947265625 +sparsity check 0.49999999574252535 +time 135.35 +7 self_attn.q_proj +Pruning ... +0.4999881684780121 0.0999881774187088 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399087049976413 0.9709505944546686 1.0 +err_prefin 86644.46875 +err_fin 52316.3046875 +sparsity check 0.4999881684780121 +time 74.80 +7 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 30040.24609375 +err_fin 23463.05859375 +sparsity check 0.49999988079071045 +time 1.33 +7 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 7565.7626953125 +err_fin 6453.3251953125 +sparsity check 0.49999988079071045 +time 1.30 +7 self_attn.o_proj +Pruning ... +0.4947527199983597 0.0947527289390564 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.42308851870487 0.9709505944546686 1.0 +err_prefin 1310.8568115234375 +err_fin 573.57177734375 +sparsity check 0.4947527199983597 +time 67.72 +7 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 134358.71875 +err_fin 80114.0625 +sparsity check 0.49999999574252535 +time 137.41 +7 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 129736.1796875 +err_fin 76680.21875 +sparsity check 0.49999999574252535 +time 137.71 +7 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 4323.55224609375 +err_fin 3912.0703125 +sparsity check 0.49999999574252535 +time 135.35 +8 self_attn.q_proj +Pruning ... +0.4999881684780121 0.0999881774187088 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399087049976413 0.9709505944546686 1.0 +err_prefin 75884.046875 +err_fin 47366.97265625 +sparsity check 0.4999881684780121 +time 74.81 +8 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 29886.185546875 +err_fin 22489.130859375 +sparsity check 0.49999988079071045 +time 1.33 +8 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 7067.2841796875 +err_fin 6060.041015625 +sparsity check 0.49999988079071045 +time 1.31 +8 self_attn.o_proj +Pruning ... +0.4958852231502533 0.09588523209095001 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4267652832437898 0.9709505944546686 1.0 +err_prefin 2190.83349609375 +err_fin 922.5817260742188 +sparsity check 0.4958852231502533 +time 67.72 +8 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 185066.015625 +err_fin 115194.484375 +sparsity check 0.49999999574252535 +time 137.41 +8 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 174999.25 +err_fin 108095.296875 +sparsity check 0.49999999574252535 +time 137.71 +8 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 8867.8662109375 +err_fin 8053.55859375 +sparsity check 0.49999999574252535 +time 135.32 +9 self_attn.q_proj +Pruning ... +0.4999762624502182 0.09997627139091492 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4398709603896664 0.9709505944546686 1.0 +err_prefin 83692.3359375 +err_fin 56241.5546875 +sparsity check 0.4999762624502182 +time 74.82 +9 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 29946.142578125 +err_fin 24662.328125 +sparsity check 0.49999988079071045 +time 1.31 +9 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 10846.4208984375 +err_fin 9679.2373046875 +sparsity check 0.49999988079071045 +time 1.31 +9 self_attn.o_proj +Pruning ... +0.49749529361724854 0.09749530255794525 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4319558197977513 0.9709505944546686 1.0 +err_prefin 1225.5751953125 +err_fin 485.3004455566406 +sparsity check 0.49749529361724854 +time 67.69 +9 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 236431.9375 +err_fin 154139.15625 +sparsity check 0.49999999574252535 +time 137.42 +9 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 222079.5625 +err_fin 143717.671875 +sparsity check 0.49999999574252535 +time 137.62 +9 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 7716.9306640625 +err_fin 7093.3818359375 +sparsity check 0.49999999574252535 +time 135.38 +10 self_attn.q_proj +Pruning ... +0.4999762624502182 0.09997627139091492 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4398709603896664 0.9709505944546686 1.0 +err_prefin 50366.90625 +err_fin 34555.5234375 +sparsity check 0.4999762624502182 +time 74.79 +10 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 20972.310546875 +err_fin 16446.255859375 +sparsity check 0.49999988079071045 +time 1.32 +10 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 10438.380859375 +err_fin 9284.658203125 +sparsity check 0.49999988079071045 +time 1.31 +10 self_attn.o_proj +Pruning ... +0.48955683410167694 0.08955684304237366 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4059394738501885 0.9709505944546686 1.0 +err_prefin 753.25927734375 +err_fin 324.17095947265625 +sparsity check 0.48955683410167694 +time 67.72 +10 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 286046.5 +err_fin 196094.625 +sparsity check 0.49999999574252535 +time 137.41 +10 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 267715.3125 +err_fin 182304.59375 +sparsity check 0.49999999574252535 +time 137.70 +10 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 8698.220703125 +err_fin 8076.32666015625 +sparsity check 0.49999999574252535 +time 135.36 +11 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 80485.90625 +err_fin 55793.3046875 +sparsity check 0.49998772144317627 +time 74.84 +11 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 33353.53125 +err_fin 26244.560546875 +sparsity check 0.49999988079071045 +time 1.31 +11 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 12479.7666015625 +err_fin 10959.146484375 +sparsity check 0.49999988079071045 +time 1.31 +11 self_attn.o_proj +Pruning ... +0.4854428768157959 0.08544288575649261 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3920235864263013 0.9709505944546686 1.0 +err_prefin 1232.4251708984375 +err_fin 523.7001953125 +sparsity check 0.4854428768157959 +time 67.75 +11 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 307951.625 +err_fin 215541.4375 +sparsity check 0.49999999574252535 +time 137.40 +11 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 289756.1875 +err_fin 201436.40625 +sparsity check 0.49999999574252535 +time 137.69 +11 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 9577.0673828125 +err_fin 8912.2724609375 +sparsity check 0.49999999574252535 +time 135.33 +12 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 68103.78125 +err_fin 47606.0390625 +sparsity check 0.49998772144317627 +time 74.81 +12 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 30560.9453125 +err_fin 23880.765625 +sparsity check 0.49999988079071045 +time 1.31 +12 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 9388.3828125 +err_fin 8111.275390625 +sparsity check 0.49999988079071045 +time 1.30 +12 self_attn.o_proj +Pruning ... +0.49501554667949677 0.09501555562019348 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4239437209889605 0.9709505944546686 1.0 +err_prefin 1512.551025390625 +err_fin 614.376220703125 +sparsity check 0.49501554667949677 +time 67.72 +12 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 304679.71875 +err_fin 214664.71875 +sparsity check 0.49999999574252535 +time 137.40 +12 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 289705.8125 +err_fin 203138.765625 +sparsity check 0.49999999574252535 +time 137.70 +12 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 10629.662109375 +err_fin 9844.0546875 +sparsity check 0.49999999574252535 +time 135.36 +13 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 89871.046875 +err_fin 63092.34765625 +sparsity check 0.49998772144317627 +time 74.97 +13 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 34686.421875 +err_fin 27442.31640625 +sparsity check 0.49999988079071045 +time 1.31 +13 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 12554.0126953125 +err_fin 11048.6611328125 +sparsity check 0.49999988079071045 +time 1.31 +13 self_attn.o_proj +Pruning ... +0.4901005029678345 0.09010051190853119 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4077558129857786 0.9709505944546686 1.0 +err_prefin 3843.938232421875 +err_fin 1864.849365234375 +sparsity check 0.4901005029678345 +time 67.88 +13 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 323137.3125 +err_fin 223854.53125 +sparsity check 0.49999999574252535 +time 137.80 +13 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 310574.3125 +err_fin 213911.125 +sparsity check 0.49999999574252535 +time 138.08 +13 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 13449.08203125 +err_fin 12456.4248046875 +sparsity check 0.49999999574252535 +time 135.74 +14 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 146839.21875 +err_fin 103904.828125 +sparsity check 0.49998772144317627 +time 74.98 +14 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 49520.5625 +err_fin 40365.359375 +sparsity check 0.49999988079071045 +time 1.32 +14 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 17806.49609375 +err_fin 15857.8916015625 +sparsity check 0.49999988079071045 +time 1.31 +14 self_attn.o_proj +Pruning ... +0.49405381083488464 0.09405381977558136 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4208087027364587 0.9709505944546686 1.0 +err_prefin 5030.2236328125 +err_fin 2252.26171875 +sparsity check 0.49405381083488464 +time 67.87 +14 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 372289.125 +err_fin 265223.0 +sparsity check 0.49999999574252535 +time 137.89 +14 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 357630.1875 +err_fin 253357.3125 +sparsity check 0.49999999574252535 +time 138.15 +14 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 14419.9716796875 +err_fin 13439.255859375 +sparsity check 0.49999999574252535 +time 135.75 +15 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 108777.1171875 +err_fin 78836.0546875 +sparsity check 0.49998772144317627 +time 75.11 +15 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 37974.76953125 +err_fin 31038.19140625 +sparsity check 0.49999988079071045 +time 1.32 +15 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 21119.76171875 +err_fin 19116.50390625 +sparsity check 0.49999988079071045 +time 1.32 +15 self_attn.o_proj +Pruning ... +0.4920341372489929 0.09203414618968964 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4141739218068623 0.9709505944546686 1.0 +err_prefin 3777.62158203125 +err_fin 1862.2109375 +sparsity check 0.4920341372489929 +time 68.01 +15 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 401217.625 +err_fin 288907.78125 +sparsity check 0.49999999574252535 +time 138.17 +15 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 387066.1875 +err_fin 277026.03125 +sparsity check 0.49999999574252535 +time 138.33 +15 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 16409.4375 +err_fin 15280.708984375 +sparsity check 0.49999999574252535 +time 136.07 +16 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 105659.078125 +err_fin 77506.4921875 +sparsity check 0.49998772144317627 +time 75.16 +16 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 41239.171875 +err_fin 33963.78515625 +sparsity check 0.49999988079071045 +time 1.31 +16 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 19672.27734375 +err_fin 17797.333984375 +sparsity check 0.49999988079071045 +time 1.31 +16 self_attn.o_proj +Pruning ... +0.49631543457508087 0.09631544351577759 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4281563935764638 0.9709505944546686 1.0 +err_prefin 4274.4638671875 +err_fin 1988.13818359375 +sparsity check 0.49631543457508087 +time 68.03 +16 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 419959.71875 +err_fin 305134.8125 +sparsity check 0.49999999574252535 +time 138.14 +16 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 406808.25 +err_fin 294106.625 +sparsity check 0.49999999574252535 +time 138.42 +16 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 17283.220703125 +err_fin 16127.3623046875 +sparsity check 0.49999999574252535 +time 136.04 +17 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 134928.78125 +err_fin 97584.6640625 +sparsity check 0.49998772144317627 +time 75.12 +17 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 48884.97265625 +err_fin 39521.5078125 +sparsity check 0.49999988079071045 +time 1.34 +17 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 17418.22265625 +err_fin 15397.0830078125 +sparsity check 0.49999988079071045 +time 1.31 +17 self_attn.o_proj +Pruning ... +0.4991466701030731 0.09914667904376984 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.437235368404992 0.9709505944546686 1.0 +err_prefin 7362.8935546875 +err_fin 2987.857421875 +sparsity check 0.4991466701030731 +time 67.96 +17 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 416129.3125 +err_fin 297964.0625 +sparsity check 0.49999999574252535 +time 138.14 +17 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 404107.375 +err_fin 287668.75 +sparsity check 0.49999999574252535 +time 138.30 +17 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 21806.95703125 +err_fin 19919.7734375 +sparsity check 0.49999999574252535 +time 136.05 +18 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 127766.4375 +err_fin 93001.578125 +sparsity check 0.49998772144317627 +time 75.10 +18 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 45825.41796875 +err_fin 37889.796875 +sparsity check 0.49999988079071045 +time 1.32 +18 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 26613.5078125 +err_fin 24281.3984375 +sparsity check 0.49999988079071045 +time 1.31 +18 self_attn.o_proj +Pruning ... +0.4893995672464371 0.08939957618713379 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.405413088249908 0.9709505944546686 1.0 +err_prefin 6830.02197265625 +err_fin 2966.24267578125 +sparsity check 0.4893995672464371 +time 68.04 +18 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 459914.1875 +err_fin 330994.0625 +sparsity check 0.49999999574252535 +time 138.13 +18 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 443396.4375 +err_fin 317242.5625 +sparsity check 0.49999999574252535 +time 138.39 +18 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 23930.73046875 +err_fin 21924.87890625 +sparsity check 0.49999999574252535 +time 136.03 +19 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 151214.796875 +err_fin 111182.21875 +sparsity check 0.49998772144317627 +time 75.11 +19 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 49385.66796875 +err_fin 42071.76171875 +sparsity check 0.49999988079071045 +time 1.31 +19 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 31174.37109375 +err_fin 28941.91796875 +sparsity check 0.49999988079071045 +time 1.30 +19 self_attn.o_proj +Pruning ... +0.4876689612865448 0.08766897022724152 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3995914973239965 0.9709505944546686 1.0 +err_prefin 4717.5537109375 +err_fin 2170.08056640625 +sparsity check 0.4876689612865448 +time 68.04 +19 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 499092.0625 +err_fin 361368.1875 +sparsity check 0.49999999574252535 +time 138.15 +19 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 481397.5 +err_fin 346922.6875 +sparsity check 0.49999999574252535 +time 138.34 +19 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 24736.42578125 +err_fin 22886.87890625 +sparsity check 0.49999999574252535 +time 136.05 +20 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 133521.78125 +err_fin 99405.609375 +sparsity check 0.49998772144317627 +time 75.12 +20 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 44213.140625 +err_fin 37228.4921875 +sparsity check 0.49999988079071045 +time 1.31 +20 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 25626.75390625 +err_fin 23838.16015625 +sparsity check 0.49999988079071045 +time 1.31 +20 self_attn.o_proj +Pruning ... +0.4920240491628647 0.0920240581035614 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4141406060638388 0.9709505944546686 1.0 +err_prefin 3953.66943359375 +err_fin 1568.969482421875 +sparsity check 0.4920240491628647 +time 68.04 +20 mlp.gate_proj +Pruning ... +0.49999337111200604 0.19997675716876984 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968095920098791 0.9709505944546686 1.0 +err_prefin 537636.25 +err_fin 392180.03125 +sparsity check 0.49999337111200604 +time 138.10 +20 mlp.up_proj +Pruning ... +0.49999337111200604 0.19997675716876984 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968095920098791 0.9709505944546686 1.0 +err_prefin 518775.59375 +err_fin 376679.75 +sparsity check 0.49999337111200604 +time 138.40 +20 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 25165.505859375 +err_fin 23440.7421875 +sparsity check 0.49999999574252535 +time 136.03 +21 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 85106.8828125 +err_fin 63341.171875 +sparsity check 0.49998772144317627 +time 75.11 +21 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 32309.31640625 +err_fin 26476.8984375 +sparsity check 0.49999988079071045 +time 1.33 +21 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 22025.603515625 +err_fin 20266.328125 +sparsity check 0.49999988079071045 +time 1.31 +21 self_attn.o_proj +Pruning ... +0.48070839047431946 0.08070839941501617 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3756209634598644 0.9709505944546686 1.0 +err_prefin 4337.72265625 +err_fin 1998.4146728515625 +sparsity check 0.48070839047431946 +time 68.04 +21 mlp.gate_proj +Pruning ... +0.49999337111200604 0.19997675716876984 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968095920098791 0.9709505944546686 1.0 +err_prefin 561466.125 +err_fin 410761.0625 +sparsity check 0.49999337111200604 +time 138.13 +21 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 545155.25 +err_fin 396950.25 +sparsity check 0.49999999574252535 +time 138.40 +21 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 26194.49609375 +err_fin 24549.0703125 +sparsity check 0.49999999574252535 +time 136.06 +22 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 82049.421875 +err_fin 61513.953125 +sparsity check 0.49998772144317627 +time 75.11 +22 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 35360.390625 +err_fin 30303.45703125 +sparsity check 0.49999988079071045 +time 1.33 +22 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 24802.689453125 +err_fin 23051.39453125 +sparsity check 0.49999988079071045 +time 1.32 +22 self_attn.o_proj +Pruning ... +0.49824874103069305 0.09824874997138977 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.434370141940453 0.9709505944546686 1.0 +err_prefin 3671.6357421875 +err_fin 1467.75341796875 +sparsity check 0.49824874103069305 +time 67.99 +22 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 588264.875 +err_fin 431812.5 +sparsity check 0.49999999574252535 +time 138.15 +22 mlp.up_proj +Pruning ... +0.49999337111200604 0.19997675716876984 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968095920098791 0.9709505944546686 1.0 +err_prefin 572260.25 +err_fin 418173.1875 +sparsity check 0.49999337111200604 +time 138.41 +22 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 26897.529296875 +err_fin 25304.95703125 +sparsity check 0.49999999574252535 +time 136.02 +23 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 117155.9375 +err_fin 87148.28125 +sparsity check 0.49998772144317627 +time 75.12 +23 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 48022.50390625 +err_fin 40585.8671875 +sparsity check 0.49999988079071045 +time 1.32 +23 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 26600.05078125 +err_fin 24621.19921875 +sparsity check 0.49999988079071045 +time 1.31 +23 self_attn.o_proj +Pruning ... +0.4934813380241394 0.09348134696483612 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4189351582836252 0.9709505944546686 1.0 +err_prefin 4764.31787109375 +err_fin 2131.6533203125 +sparsity check 0.4934813380241394 +time 67.99 +23 mlp.gate_proj +Pruning ... +0.49999337111200604 0.19997675716876984 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968095920098791 0.9709505944546686 1.0 +err_prefin 603782.4375 +err_fin 444955.90625 +sparsity check 0.49999337111200604 +time 138.13 +23 mlp.up_proj +Pruning ... +0.49999337111200604 0.19997675716876984 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968095920098791 0.9709505944546686 1.0 +err_prefin 588263.3125 +err_fin 431614.96875 +sparsity check 0.49999337111200604 +time 138.29 +23 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 27810.431640625 +err_fin 26119.39453125 +sparsity check 0.49999999574252535 +time 136.07 +24 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 103867.359375 +err_fin 76765.421875 +sparsity check 0.49998772144317627 +time 75.11 +24 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 41862.609375 +err_fin 34830.8515625 +sparsity check 0.49999988079071045 +time 1.32 +24 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 20143.111328125 +err_fin 18188.3203125 +sparsity check 0.49999988079071045 +time 1.31 +24 self_attn.o_proj +Pruning ... +0.49733252823352814 0.09733253717422485 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4314330393829158 0.9709505944546686 1.0 +err_prefin 4534.4150390625 +err_fin 2072.19873046875 +sparsity check 0.49733252823352814 +time 68.04 +24 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 584705.3125 +err_fin 432536.96875 +sparsity check 0.49999999574252535 +time 138.12 +24 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 574857.375 +err_fin 423772.9375 +sparsity check 0.49999999574252535 +time 138.39 +24 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 27941.52734375 +err_fin 26251.923828125 +sparsity check 0.49999999574252535 +time 136.05 +25 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 120071.890625 +err_fin 88952.328125 +sparsity check 0.49998772144317627 +time 75.11 +25 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 47751.328125 +err_fin 39539.93359375 +sparsity check 0.49999988079071045 +time 1.31 +25 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 20739.15234375 +err_fin 18690.85546875 +sparsity check 0.49999988079071045 +time 1.30 +25 self_attn.o_proj +Pruning ... +0.49525563418865204 0.09525564312934875 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4247239201127866 0.9709505944546686 1.0 +err_prefin 10895.736328125 +err_fin 5156.1416015625 +sparsity check 0.49525563418865204 +time 67.99 +25 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 557226.375 +err_fin 400755.0625 +sparsity check 0.49999999574252535 +time 138.09 +25 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 553179.4375 +err_fin 396006.375 +sparsity check 0.49999999574252535 +time 138.31 +25 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 37830.1953125 +err_fin 34984.01953125 +sparsity check 0.49999999574252535 +time 136.07 +26 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 175385.265625 +err_fin 129243.6875 +sparsity check 0.49998772144317627 +time 75.11 +26 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 61568.4375 +err_fin 52269.25 +sparsity check 0.49999988079071045 +time 1.32 +26 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 31962.962890625 +err_fin 29029.134765625 +sparsity check 0.49999988079071045 +time 1.30 +26 self_attn.o_proj +Pruning ... +0.495570570230484 0.09557057917118073 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4257458876633167 0.9709505944546686 1.0 +err_prefin 12212.02734375 +err_fin 6031.96142578125 +sparsity check 0.495570570230484 +time 67.99 +26 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 632510.375 +err_fin 460037.625 +sparsity check 0.49999999574252535 +time 138.11 +26 mlp.up_proj +Pruning ... +0.49999337111200604 0.19997675716876984 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968095920098791 0.9709505944546686 1.0 +err_prefin 624925.6875 +err_fin 452469.21875 +sparsity check 0.49999337111200604 +time 138.41 +26 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 37664.9296875 +err_fin 35469.4453125 +sparsity check 0.49999999574252535 +time 136.06 +27 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 79798.0078125 +err_fin 60004.65625 +sparsity check 0.49998772144317627 +time 75.12 +27 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 36520.203125 +err_fin 30774.515625 +sparsity check 0.49999988079071045 +time 1.31 +27 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 26921.06640625 +err_fin 25074.23828125 +sparsity check 0.49999988079071045 +time 1.31 +27 self_attn.o_proj +Pruning ... +0.4959203898906708 0.09592039883136749 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.426879112102447 0.9709505944546686 1.0 +err_prefin 5038.2265625 +err_fin 2082.519775390625 +sparsity check 0.4959203898906708 +time 68.01 +27 mlp.gate_proj +Pruning ... +0.49999337111200604 0.19997675716876984 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968095920098791 0.9709505944546686 1.0 +err_prefin 669772.1875 +err_fin 490635.5625 +sparsity check 0.49999337111200604 +time 138.11 +27 mlp.up_proj +Pruning ... +0.49999337111200604 0.19997675716876984 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968095920098791 0.9709505944546686 1.0 +err_prefin 659654.625 +err_fin 480803.78125 +sparsity check 0.49999337111200604 +time 138.32 +27 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 39647.5390625 +err_fin 37466.8203125 +sparsity check 0.49999999574252535 +time 136.05 +28 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 145252.265625 +err_fin 109142.9140625 +sparsity check 0.49998772144317627 +time 75.11 +28 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 56241.96484375 +err_fin 48937.75 +sparsity check 0.49999988079071045 +time 1.33 +28 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 40909.66796875 +err_fin 38057.453125 +sparsity check 0.49999988079071045 +time 1.31 +28 self_attn.o_proj +Pruning ... +0.49546174705028534 0.09546175599098206 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4253929438441721 0.9709505944546686 1.0 +err_prefin 8529.5234375 +err_fin 4099.28125 +sparsity check 0.49546174705028534 +time 68.01 +28 mlp.gate_proj +Pruning ... +0.49999337111200604 0.19997675716876984 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968095920098791 0.9709505944546686 1.0 +err_prefin 706189.625 +err_fin 521497.25 +sparsity check 0.49999337111200604 +time 138.13 +28 mlp.up_proj +Pruning ... +0.49999337111200604 0.19997675716876984 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968095920098791 0.9709505944546686 1.0 +err_prefin 695184.9375 +err_fin 510882.78125 +sparsity check 0.49999337111200604 +time 138.42 +28 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 41796.6484375 +err_fin 39637.125 +sparsity check 0.49999999574252535 +time 136.02 +29 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 143634.5 +err_fin 108505.09375 +sparsity check 0.49998772144317627 +time 74.95 +29 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 52403.703125 +err_fin 45089.0 +sparsity check 0.49999988079071045 +time 1.32 +29 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 43642.34375 +err_fin 40687.1796875 +sparsity check 0.49999988079071045 +time 1.31 +29 self_attn.o_proj +Pruning ... +0.49339035153388977 0.09339036047458649 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4186368717223352 0.9709505944546686 1.0 +err_prefin 8886.2861328125 +err_fin 4260.4326171875 +sparsity check 0.49339035153388977 +time 67.86 +29 mlp.gate_proj +Pruning ... +0.49999337111200604 0.19997675716876984 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968095920098791 0.9709505944546686 1.0 +err_prefin 747150.25 +err_fin 554973.375 +sparsity check 0.49999337111200604 +time 137.87 +29 mlp.up_proj +Pruning ... +0.49999337111200604 0.19997675716876984 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968095920098791 0.9709505944546686 1.0 +err_prefin 733530.8125 +err_fin 542177.375 +sparsity check 0.49999337111200604 +time 138.15 +29 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 43809.9921875 +err_fin 41724.9765625 +sparsity check 0.49999999574252535 +time 135.81 +30 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 76537.53125 +err_fin 58076.2109375 +sparsity check 0.49998772144317627 +time 74.81 +30 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 28479.72265625 +err_fin 24591.79296875 +sparsity check 0.49999988079071045 +time 1.32 +30 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 39850.8671875 +err_fin 37386.2578125 +sparsity check 0.49999988079071045 +time 1.31 +30 self_attn.o_proj +Pruning ... +0.498422771692276 0.09842278063297272 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4349264835682236 0.9709505944546686 1.0 +err_prefin 4446.6142578125 +err_fin 2080.4375 +sparsity check 0.498422771692276 +time 67.71 +30 mlp.gate_proj +Pruning ... +0.49999337111200604 0.19997675716876984 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968095920098791 0.9709505944546686 1.0 +err_prefin 791530.875 +err_fin 591407.3125 +sparsity check 0.49999337111200604 +time 137.76 +30 mlp.up_proj +Pruning ... +0.49999337111200604 0.19997675716876984 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968095920098791 0.9709505944546686 1.0 +err_prefin 774201.875 +err_fin 575441.0625 +sparsity check 0.49999337111200604 +time 138.09 +30 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 45369.296875 +err_fin 43251.7734375 +sparsity check 0.49999999574252535 +time 135.73 +31 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 151977.609375 +err_fin 115422.6953125 +sparsity check 0.49998772144317627 +time 74.80 +31 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 53647.578125 +err_fin 46616.53125 +sparsity check 0.49999988079071045 +time 1.31 +31 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 43322.09375 +err_fin 39935.734375 +sparsity check 0.49999988079071045 +time 1.31 +31 self_attn.o_proj +Pruning ... +0.4965995103120804 0.0965995192527771 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4290732843687386 0.9709505944546686 1.0 +err_prefin 8435.0986328125 +err_fin 4382.162109375 +sparsity check 0.4965995103120804 +time 67.67 +31 mlp.gate_proj +Pruning ... +0.49999337111200604 0.19997675716876984 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968095920098791 0.9709505944546686 1.0 +err_prefin 824860.75 +err_fin 621916.0625 +sparsity check 0.49999337111200604 +time 137.68 +31 mlp.up_proj +Pruning ... +0.4999863122190748 0.19995205104351044 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1967954719582186 0.9709505944546686 1.0 +err_prefin 804884.875 +err_fin 604100.0625 +sparsity check 0.4999863122190748 +time 137.92 +31 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 46429.7890625 +err_fin 44336.328125 +sparsity check 0.49999999574252535 +time 135.72 +32 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 169728.5 +err_fin 129772.34375 +sparsity check 0.49998772144317627 +time 74.80 +32 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 56991.4921875 +err_fin 50299.40234375 +sparsity check 0.49999988079071045 +time 1.31 +32 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 39274.08203125 +err_fin 36705.93359375 +sparsity check 0.49999988079071045 +time 1.31 +32 self_attn.o_proj +Pruning ... +0.497482493519783 0.09748250246047974 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4319147233598155 0.9709505944546686 1.0 +err_prefin 6503.97265625 +err_fin 3032.211669921875 +sparsity check 0.497482493519783 +time 67.71 +32 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 844488.0 +err_fin 634982.75 +sparsity check 0.49999999574252535 +time 137.59 +32 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 825636.625 +err_fin 618152.875 +sparsity check 0.49999999574252535 +time 137.93 +32 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 49154.30859375 +err_fin 46833.76953125 +sparsity check 0.49999999574252535 +time 135.58 +33 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 241426.328125 +err_fin 182804.09375 +sparsity check 0.49998772144317627 +time 74.84 +33 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 71084.65625 +err_fin 61727.87109375 +sparsity check 0.49999988079071045 +time 1.31 +33 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 43832.87890625 +err_fin 40348.328125 +sparsity check 0.49999988079071045 +time 1.30 +33 self_attn.o_proj +Pruning ... +0.4924851059913635 0.09248511493206024 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.415661446586966 0.9709505944546686 1.0 +err_prefin 12809.04296875 +err_fin 6111.228515625 +sparsity check 0.4924851059913635 +time 67.80 +33 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 855442.375 +err_fin 634631.125 +sparsity check 0.49999999574252535 +time 137.85 +33 mlp.up_proj +Pruning ... +0.49999337111200604 0.19997675716876984 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968095920098791 0.9709505944546686 1.0 +err_prefin 839932.0 +err_fin 620200.3125 +sparsity check 0.49999337111200604 +time 138.06 +33 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 55860.78125 +err_fin 52946.34375 +sparsity check 0.49999999574252535 +time 135.80 +34 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 87907.765625 +err_fin 66005.96875 +sparsity check 0.49998772144317627 +time 74.79 +34 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 38007.6015625 +err_fin 31144.2890625 +sparsity check 0.49999988079071045 +time 1.32 +34 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 26247.046875 +err_fin 23801.80859375 +sparsity check 0.49999988079071045 +time 1.31 +34 self_attn.o_proj +Pruning ... +0.4965437799692154 0.09654378890991211 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4288935128557605 0.9709505944546686 1.0 +err_prefin 8917.4375 +err_fin 4313.02099609375 +sparsity check 0.4965437799692154 +time 67.69 +34 mlp.gate_proj +Pruning ... +0.49999999148505075 0.19999994337558746 0.4428571505205972 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968228405567136 0.9709505944546686 1.0 +err_prefin 883252.625 +err_fin 658956.625 +sparsity check 0.49999999148505075 +time 137.71 +34 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 867057.875 +err_fin 643740.75 +sparsity check 0.49999999574252535 +time 138.06 +34 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 60795.9921875 +err_fin 57895.12109375 +sparsity check 0.49999999574252535 +time 135.74 +35 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 142988.84375 +err_fin 108514.25 +sparsity check 0.49998772144317627 +time 74.81 +35 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 47445.8515625 +err_fin 39947.671875 +sparsity check 0.49999988079071045 +time 1.31 +35 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 35960.7734375 +err_fin 33210.8359375 +sparsity check 0.49999988079071045 +time 1.31 +35 self_attn.o_proj +Pruning ... +0.4856218099594116 0.08562181890010834 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3926352497431633 0.9709505944546686 1.0 +err_prefin 9172.3349609375 +err_fin 4169.6044921875 +sparsity check 0.4856218099594116 +time 67.70 +35 mlp.gate_proj +Pruning ... +0.4999863122190748 0.19995205104351044 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1967954719582186 0.9709505944546686 1.0 +err_prefin 936726.75 +err_fin 700609.5 +sparsity check 0.4999863122190748 +time 137.63 +35 mlp.up_proj +Pruning ... +0.4999863122190748 0.19995205104351044 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1967954719582186 0.9709505944546686 1.0 +err_prefin 917721.875 +err_fin 683108.25 +sparsity check 0.4999863122190748 +time 137.89 +35 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 65276.53125 +err_fin 62261.46875 +sparsity check 0.49999999574252535 +time 135.69 +36 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 136184.03125 +err_fin 103272.765625 +sparsity check 0.49998772144317627 +time 74.79 +36 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 45948.1953125 +err_fin 39011.28125 +sparsity check 0.49999988079071045 +time 1.31 +36 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 34898.28515625 +err_fin 32478.26171875 +sparsity check 0.49999988079071045 +time 1.31 +36 self_attn.o_proj +Pruning ... +0.4887405186891556 0.0887405276298523 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4032024212222955 0.9709505944546686 1.0 +err_prefin 6714.736328125 +err_fin 2944.044677734375 +sparsity check 0.4887405186891556 +time 67.77 +36 mlp.gate_proj +Pruning ... +0.4999863122190748 0.19995205104351044 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1967954719582186 0.9709505944546686 1.0 +err_prefin 958992.0625 +err_fin 716187.625 +sparsity check 0.4999863122190748 +time 137.61 +36 mlp.up_proj +Pruning ... +0.4999863122190748 0.19995205104351044 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1967954719582186 0.9709505944546686 1.0 +err_prefin 940117.875 +err_fin 698780.0 +sparsity check 0.4999863122190748 +time 137.96 +36 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 72726.25 +err_fin 69204.3203125 +sparsity check 0.49999999574252535 +time 135.66 +37 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 214821.875 +err_fin 161912.28125 +sparsity check 0.49998772144317627 +time 74.81 +37 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 62873.0390625 +err_fin 53841.98828125 +sparsity check 0.49999988079071045 +time 1.31 +37 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 45130.265625 +err_fin 41807.7578125 +sparsity check 0.49999988079071045 +time 1.31 +37 self_attn.o_proj +Pruning ... +0.48692476749420166 0.08692477643489838 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3970715469171076 0.9709505944546686 1.0 +err_prefin 13892.369140625 +err_fin 5770.4619140625 +sparsity check 0.48692476749420166 +time 67.72 +37 mlp.gate_proj +Pruning ... +0.4999863122190748 0.19995205104351044 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1967954719582186 0.9709505944546686 1.0 +err_prefin 1001696.25 +err_fin 742198.4375 +sparsity check 0.4999863122190748 +time 137.57 +37 mlp.up_proj +Pruning ... +0.4999929368495941 0.19997523725032806 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968087233910638 0.9709505944546686 1.0 +err_prefin 981569.75 +err_fin 723922.375 +sparsity check 0.4999929368495941 +time 137.86 +37 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 79529.15625 +err_fin 75579.359375 +sparsity check 0.49999999574252535 +time 135.58 +38 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 241476.09375 +err_fin 179820.125 +sparsity check 0.49998772144317627 +time 74.80 +38 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 70421.0859375 +err_fin 58469.70703125 +sparsity check 0.49999988079071045 +time 1.31 +38 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 47591.578125 +err_fin 43447.296875 +sparsity check 0.49999988079071045 +time 1.30 +38 self_attn.o_proj +Pruning ... +0.4874471426010132 0.0874471515417099 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.398841434910925 0.9709505944546686 1.0 +err_prefin 22931.5625 +err_fin 9835.1083984375 +sparsity check 0.4874471426010132 +time 67.73 +38 mlp.gate_proj +Pruning ... +0.4999929368495941 0.19997523725032806 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968087233910638 0.9709505944546686 1.0 +err_prefin 1032398.5 +err_fin 762469.0 +sparsity check 0.4999929368495941 +time 137.69 +38 mlp.up_proj +Pruning ... +0.4999929368495941 0.19997523725032806 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968087233910638 0.9709505944546686 1.0 +err_prefin 1014006.0 +err_fin 745387.375 +sparsity check 0.4999929368495941 +time 138.04 +38 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 89015.859375 +err_fin 84436.0859375 +sparsity check 0.49999999574252535 +time 135.75 +39 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 296672.65625 +err_fin 219949.171875 +sparsity check 0.49998772144317627 +time 74.79 +39 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 87629.203125 +err_fin 76056.8828125 +sparsity check 0.49999988079071045 +time 1.31 +39 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 51629.421875 +err_fin 47132.328125 +sparsity check 0.49999988079071045 +time 1.30 +39 self_attn.o_proj +Pruning ... +0.4940083473920822 0.09400835633277893 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.420660116755753 0.9709505944546686 1.0 +err_prefin 29438.013671875 +err_fin 11882.00390625 +sparsity check 0.4940083473920822 +time 67.72 +39 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 1044343.8125 +err_fin 760460.25 +sparsity check 0.49999999574252535 +time 137.67 +39 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 1028718.3125 +err_fin 745674.125 +sparsity check 0.49999999574252535 +time 137.92 +39 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 108696.53125 +err_fin 101968.9375 +sparsity check 0.49999999574252535 +time 135.71 +40 self_attn.q_proj +Pruning ... +0.4999999850988388 0.09999999403953552 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399461639197515 0.9709505944546686 1.0 +err_prefin 276137.53125 +err_fin 200761.25 +sparsity check 0.4999999850988388 +time 74.80 +40 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 76791.5546875 +err_fin 66313.9140625 +sparsity check 0.49999988079071045 +time 1.31 +40 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 58183.0546875 +err_fin 52631.859375 +sparsity check 0.49999988079071045 +time 1.30 +40 self_attn.o_proj +Pruning ... +0.49458740651607513 0.09458741545677185 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4225500160701596 0.9709505944546686 1.0 +err_prefin 43815.84375 +err_fin 21857.01171875 +sparsity check 0.49458740651607513 +time 67.72 +40 mlp.gate_proj +Pruning ... +0.4999929368495941 0.19997523725032806 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968087233910638 0.9709505944546686 1.0 +err_prefin 1088925.75 +err_fin 777796.5 +sparsity check 0.4999929368495941 +time 137.70 +40 mlp.up_proj +Pruning ... +0.4999929368495941 0.19997523725032806 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968087233910638 0.9709505944546686 1.0 +err_prefin 1070080.0 +err_fin 760568.125 +sparsity check 0.4999929368495941 +time 138.03 +40 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 122476.9921875 +err_fin 115007.2890625 +sparsity check 0.49999999574252535 +time 135.74 +41 self_attn.q_proj +Pruning ... +0.4999999850988388 0.09999999403953552 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399461639197515 0.9709505944546686 1.0 +err_prefin 226480.96875 +err_fin 162543.71875 +sparsity check 0.4999999850988388 +time 74.80 +41 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 66330.421875 +err_fin 56078.65625 +sparsity check 0.49999988079071045 +time 1.32 +41 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 50970.06640625 +err_fin 46188.6875 +sparsity check 0.49999988079071045 +time 1.31 +41 self_attn.o_proj +Pruning ... +0.49377967417240143 0.09377968311309814 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4199122238458421 0.9709505944546686 1.0 +err_prefin 40533.98046875 +err_fin 18104.935546875 +sparsity check 0.49377967417240143 +time 67.72 +41 mlp.gate_proj +Pruning ... +0.4999929368495941 0.19997523725032806 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968087233910638 0.9709505944546686 1.0 +err_prefin 1142463.125 +err_fin 795477.6875 +sparsity check 0.4999929368495941 +time 137.67 +41 mlp.up_proj +Pruning ... +0.4999929368495941 0.19997523725032806 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968087233910638 0.9709505944546686 1.0 +err_prefin 1101468.5 +err_fin 761714.1875 +sparsity check 0.4999929368495941 +time 137.89 +41 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 142451.421875 +err_fin 132629.96875 +sparsity check 0.49999999574252535 +time 135.63 +42 self_attn.q_proj +Pruning ... +0.4999999850988388 0.09999999403953552 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399461639197515 0.9709505944546686 1.0 +err_prefin 260025.5625 +err_fin 183340.40625 +sparsity check 0.4999999850988388 +time 74.85 +42 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 67961.0546875 +err_fin 57528.015625 +sparsity check 0.49999988079071045 +time 1.31 +42 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 62688.9921875 +err_fin 57235.4375 +sparsity check 0.49999988079071045 +time 1.31 +42 self_attn.o_proj +Pruning ... +0.49190540611743927 0.09190541505813599 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4137486574036342 0.9709505944546686 1.0 +err_prefin 35988.8125 +err_fin 16496.677734375 +sparsity check 0.49190540611743927 +time 67.76 +42 mlp.gate_proj +Pruning ... +0.4999929368495941 0.19997523725032806 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968087233910638 0.9709505944546686 1.0 +err_prefin 1237528.0 +err_fin 851591.625 +sparsity check 0.4999929368495941 +time 137.82 +42 mlp.up_proj +Pruning ... +0.4999929368495941 0.19997523725032806 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968087233910638 0.9709505944546686 1.0 +err_prefin 1164203.5 +err_fin 795813.75 +sparsity check 0.4999929368495941 +time 138.15 +42 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 155792.96875 +err_fin 144716.15625 +sparsity check 0.49999999574252535 +time 135.82 +43 self_attn.q_proj +Pruning ... +0.4999999850988388 0.09999999403953552 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399461639197515 0.9709505944546686 1.0 +err_prefin 180837.96875 +err_fin 126696.921875 +sparsity check 0.4999999850988388 +time 74.82 +43 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 60236.5 +err_fin 50642.33984375 +sparsity check 0.49999988079071045 +time 1.33 +43 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 49476.8046875 +err_fin 44093.203125 +sparsity check 0.49999988079071045 +time 1.31 +43 self_attn.o_proj +Pruning ... +0.49158766865730286 0.09158767759799957 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4126977820904947 0.9709505944546686 1.0 +err_prefin 25418.21484375 +err_fin 12953.314453125 +sparsity check 0.49158766865730286 +time 67.71 +43 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 1288788.25 +err_fin 880817.5625 +sparsity check 0.49999999574252535 +time 137.61 +43 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 1200073.75 +err_fin 814611.3125 +sparsity check 0.49999999574252535 +time 137.85 +43 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 172545.75 +err_fin 159962.21875 +sparsity check 0.49999999574252535 +time 135.65 +44 self_attn.q_proj +Pruning ... +0.4999999850988388 0.09999999403953552 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399461639197515 0.9709505944546686 1.0 +err_prefin 319052.75 +err_fin 221645.265625 +sparsity check 0.4999999850988388 +time 74.80 +44 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 68662.0703125 +err_fin 59145.0703125 +sparsity check 0.49999988079071045 +time 1.33 +44 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 77577.5859375 +err_fin 71179.9375 +sparsity check 0.49999988079071045 +time 1.30 +44 self_attn.o_proj +Pruning ... +0.49883486330509186 0.09883487224578857 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.43624189692368 0.9709505944546686 1.0 +err_prefin 73088.859375 +err_fin 36183.0546875 +sparsity check 0.49883486330509186 +time 67.66 +44 mlp.gate_proj +Pruning ... +0.4999929368495941 0.19997523725032806 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968087233910638 0.9709505944546686 1.0 +err_prefin 1415447.75 +err_fin 949839.5 +sparsity check 0.4999929368495941 +time 137.61 +44 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 1270309.625 +err_fin 844733.5 +sparsity check 0.49999999574252535 +time 137.93 +44 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 201792.09375 +err_fin 184093.28125 +sparsity check 0.49999999574252535 +time 135.62 +45 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 206996.09375 +err_fin 141192.875 +sparsity check 0.49998772144317627 +time 74.79 +45 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 63186.203125 +err_fin 53493.8125 +sparsity check 0.49999988079071045 +time 1.31 +45 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 71699.359375 +err_fin 65873.9609375 +sparsity check 0.49999988079071045 +time 1.31 +45 self_attn.o_proj +Pruning ... +0.49905718863010406 0.09905719757080078 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.436950425504849 0.9709505944546686 1.0 +err_prefin 20377.58984375 +err_fin 10190.095703125 +sparsity check 0.49905718863010406 +time 67.66 +45 mlp.gate_proj +Pruning ... +0.4999929368495941 0.19997523725032806 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968087233910638 0.9709505944546686 1.0 +err_prefin 1533385.25 +err_fin 1029034.375 +sparsity check 0.4999929368495941 +time 137.57 +45 mlp.up_proj +Pruning ... +0.4999860099383763 0.19995099306106567 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196794867264677 0.9709505944546686 1.0 +err_prefin 1358533.25 +err_fin 903150.375 +sparsity check 0.4999860099383763 +time 137.89 +45 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 208862.671875 +err_fin 191016.1875 +sparsity check 0.49999999574252535 +time 135.58 +46 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 174311.3125 +err_fin 118973.0859375 +sparsity check 0.49998772144317627 +time 74.82 +46 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 52012.3125 +err_fin 44647.17578125 +sparsity check 0.4999997615814209 +time 1.32 +46 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 64555.6953125 +err_fin 59597.5 +sparsity check 0.49999988079071045 +time 1.30 +46 self_attn.o_proj +Pruning ... +0.4972884804010391 0.09728848934173584 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4312914889320105 0.9709505944546686 1.0 +err_prefin 26271.66796875 +err_fin 13423.681640625 +sparsity check 0.4972884804010391 +time 67.67 +46 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 1643266.375 +err_fin 1109114.0 +sparsity check 0.49999999574252535 +time 137.63 +46 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 1432952.0 +err_fin 958490.9375 +sparsity check 0.49999999574252535 +time 137.96 +46 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 210038.59375 +err_fin 193328.28125 +sparsity check 0.49999999574252535 +time 135.64 +47 self_attn.q_proj +Pruning ... +0.4999999850988388 0.09999999403953552 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399461639197515 0.9709505944546686 1.0 +err_prefin 235650.875 +err_fin 161934.359375 +sparsity check 0.4999999850988388 +time 74.82 +47 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 72157.484375 +err_fin 61676.18359375 +sparsity check 0.49999988079071045 +time 1.32 +47 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 60162.171875 +err_fin 54988.4921875 +sparsity check 0.49999988079071045 +time 1.31 +47 self_attn.o_proj +Pruning ... +0.4947979599237442 0.09479796886444092 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4232358059733905 0.9709505944546686 1.0 +err_prefin 43868.10546875 +err_fin 17617.6796875 +sparsity check 0.4947979599237442 +time 67.67 +47 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 1707190.0 +err_fin 1138405.25 +sparsity check 0.49999999574252535 +time 137.71 +47 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 1468303.5 +err_fin 969839.125 +sparsity check 0.49999999574252535 +time 137.92 +47 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 235664.03125 +err_fin 214540.015625 +sparsity check 0.49999999574252535 +time 135.71 +48 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 120378.390625 +err_fin 81721.375 +sparsity check 0.49998772144317627 +time 74.82 +48 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 35274.08984375 +err_fin 29568.283203125 +sparsity check 0.49999988079071045 +time 1.32 +48 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 61085.9140625 +err_fin 56080.40625 +sparsity check 0.49999988079071045 +time 1.30 +48 self_attn.o_proj +Pruning ... +0.49621304869651794 0.09621305763721466 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4278256023079603 0.9709505944546686 1.0 +err_prefin 26171.66796875 +err_fin 13117.326171875 +sparsity check 0.49621304869651794 +time 67.69 +48 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 1784709.5 +err_fin 1185612.875 +sparsity check 0.49999999574252535 +time 137.64 +48 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 1528418.0 +err_fin 1005144.5625 +sparsity check 0.49999999574252535 +time 137.92 +48 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 236499.40625 +err_fin 216323.59375 +sparsity check 0.49999999574252535 +time 135.62 +49 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 71287.03125 +err_fin 48274.9453125 +sparsity check 0.49998772144317627 +time 74.88 +49 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 28936.22265625 +err_fin 24354.15234375 +sparsity check 0.49999988079071045 +time 1.32 +49 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 48516.5390625 +err_fin 43833.39453125 +sparsity check 0.49999988079071045 +time 1.30 +49 self_attn.o_proj +Pruning ... +0.49976903200149536 0.09976904094219208 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.439213632094054 0.9709505944546686 1.0 +err_prefin 12020.09375 +err_fin 5332.6396484375 +sparsity check 0.49976903200149536 +time 67.77 +49 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 1853601.5 +err_fin 1232022.875 +sparsity check 0.49999999574252535 +time 137.96 +49 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 1584004.75 +err_fin 1043144.75 +sparsity check 0.49999999574252535 +time 138.14 +49 mlp.down_proj +Pruning ... +0.49999999148505075 0.19999994337558746 0.4428571505205972 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968228405567136 0.9709505944546686 1.0 +err_prefin 237605.40625 +err_fin 218308.71875 +sparsity check 0.49999999148505075 +time 135.89 +50 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 107512.8515625 +err_fin 72915.1328125 +sparsity check 0.49998772144317627 +time 74.90 +50 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 31522.11328125 +err_fin 26362.2578125 +sparsity check 0.49999988079071045 +time 1.31 +50 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 62462.328125 +err_fin 56655.20703125 +sparsity check 0.49999988079071045 +time 1.30 +50 self_attn.o_proj +Pruning ... +0.49843937158584595 0.09843938052654266 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4349795244104402 0.9709505944546686 1.0 +err_prefin 19946.50390625 +err_fin 9844.80859375 +sparsity check 0.49843937158584595 +time 67.79 +50 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 1921409.5 +err_fin 1280349.875 +sparsity check 0.49999999574252535 +time 137.78 +50 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 1636713.5 +err_fin 1081025.75 +sparsity check 0.49999999574252535 +time 138.09 +50 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 239087.375 +err_fin 220257.65625 +sparsity check 0.49999999574252535 +time 135.76 +51 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 137617.84375 +err_fin 93474.2734375 +sparsity check 0.49998772144317627 +time 74.82 +51 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 43869.53125 +err_fin 37722.6015625 +sparsity check 0.49999988079071045 +time 1.31 +51 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 66521.75 +err_fin 60619.421875 +sparsity check 0.49999988079071045 +time 1.31 +51 self_attn.o_proj +Pruning ... +0.4904519319534302 0.09045194089412689 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.408927129814348 0.9709505944546686 1.0 +err_prefin 19066.23828125 +err_fin 8592.8701171875 +sparsity check 0.4904519319534302 +time 67.72 +51 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 1972205.0 +err_fin 1320068.625 +sparsity check 0.49999999574252535 +time 137.41 +51 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 1685180.375 +err_fin 1118161.75 +sparsity check 0.49999999574252535 +time 137.59 +51 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 240411.546875 +err_fin 221970.890625 +sparsity check 0.49999999574252535 +time 135.39 +52 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 223224.5625 +err_fin 153257.15625 +sparsity check 0.49998772144317627 +time 74.82 +52 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 50713.9921875 +err_fin 44092.09375 +sparsity check 0.49999988079071045 +time 1.32 +52 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 78441.4609375 +err_fin 71977.953125 +sparsity check 0.49999988079071045 +time 1.31 +52 self_attn.o_proj +Pruning ... +0.4918024092912674 0.09180241823196411 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4134082002290973 0.9709505944546686 1.0 +err_prefin 35848.7890625 +err_fin 16662.212890625 +sparsity check 0.4918024092912674 +time 67.68 +52 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2020939.25 +err_fin 1360066.875 +sparsity check 0.49999999574252535 +time 137.40 +52 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 1724693.5 +err_fin 1150505.875 +sparsity check 0.49999999574252535 +time 137.69 +52 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 245836.328125 +err_fin 227286.4375 +sparsity check 0.49999999574252535 +time 135.37 +53 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 75688.71875 +err_fin 52040.27734375 +sparsity check 0.49998772144317627 +time 74.83 +53 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 23458.7578125 +err_fin 19803.484375 +sparsity check 0.49999988079071045 +time 1.32 +53 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 61326.74609375 +err_fin 56115.328125 +sparsity check 0.49999988079071045 +time 1.31 +53 self_attn.o_proj +Pruning ... +0.4999876320362091 0.09998764097690582 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399070044133822 0.9709505944546686 1.0 +err_prefin 16158.013671875 +err_fin 7425.544921875 +sparsity check 0.4999876320362091 +time 67.69 +53 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2072409.25 +err_fin 1394473.875 +sparsity check 0.49999999574252535 +time 137.40 +53 mlp.up_proj +Pruning ... +0.49999306883130756 0.19997569918632507 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968089873836865 0.9709505944546686 1.0 +err_prefin 1771752.25 +err_fin 1181658.5 +sparsity check 0.49999306883130756 +time 137.69 +53 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 248550.5 +err_fin 229996.0 +sparsity check 0.49999999574252535 +time 135.37 +54 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 89043.7109375 +err_fin 60977.234375 +sparsity check 0.49998772144317627 +time 74.83 +54 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 26400.94140625 +err_fin 22019.529296875 +sparsity check 0.49999988079071045 +time 1.31 +54 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 63252.6484375 +err_fin 58003.8515625 +sparsity check 0.49999988079071045 +time 1.30 +54 self_attn.o_proj +Pruning ... +0.4955211728811264 0.09552118182182312 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4255857028398577 0.9709505944546686 1.0 +err_prefin 17532.365234375 +err_fin 8175.330078125 +sparsity check 0.4955211728811264 +time 67.70 +54 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2120160.5 +err_fin 1431364.625 +sparsity check 0.49999999574252535 +time 137.45 +54 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 1819114.125 +err_fin 1217025.5 +sparsity check 0.49999999574252535 +time 137.80 +54 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 248944.78125 +err_fin 230963.3125 +sparsity check 0.49999999574252535 +time 135.53 +55 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 141524.65625 +err_fin 97785.8359375 +sparsity check 0.49998772144317627 +time 74.85 +55 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 43051.9921875 +err_fin 36888.5859375 +sparsity check 0.49999988079071045 +time 1.31 +55 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 81794.875 +err_fin 74996.4375 +sparsity check 0.49999988079071045 +time 1.32 +55 self_attn.o_proj +Pruning ... +0.49940069019794464 0.09940069913864136 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4380435602332058 0.9709505944546686 1.0 +err_prefin 31721.14453125 +err_fin 14184.46875 +sparsity check 0.49940069019794464 +time 67.74 +55 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2157582.5 +err_fin 1460996.75 +sparsity check 0.49999999574252535 +time 137.82 +55 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 1866490.75 +err_fin 1252972.0 +sparsity check 0.49999999574252535 +time 138.03 +55 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 254706.140625 +err_fin 237028.265625 +sparsity check 0.49999999574252535 +time 135.82 +56 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 103589.296875 +err_fin 71867.15625 +sparsity check 0.49998772144317627 +time 74.84 +56 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 34095.0390625 +err_fin 28903.630859375 +sparsity check 0.49999988079071045 +time 1.32 +56 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 57818.0625 +err_fin 53014.0859375 +sparsity check 0.49999988079071045 +time 1.31 +56 self_attn.o_proj +Pruning ... +0.49720531702041626 0.09720532596111298 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4310241513281567 0.9709505944546686 1.0 +err_prefin 17526.7421875 +err_fin 7605.9521484375 +sparsity check 0.49720531702041626 +time 67.73 +56 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2191015.75 +err_fin 1483332.875 +sparsity check 0.49999999574252535 +time 137.77 +56 mlp.up_proj +Pruning ... +0.4999930475439344 0.1999756246805191 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968089448042682 0.9709505944546686 1.0 +err_prefin 1899413.0 +err_fin 1275646.75 +sparsity check 0.4999930475439344 +time 138.10 +56 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 258692.40625 +err_fin 240815.328125 +sparsity check 0.49999999574252535 +time 135.75 +57 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 99702.078125 +err_fin 68511.875 +sparsity check 0.49998772144317627 +time 74.80 +57 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 32397.16015625 +err_fin 27770.46484375 +sparsity check 0.49999988079071045 +time 1.31 +57 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 74720.7109375 +err_fin 68767.1875 +sparsity check 0.49999988079071045 +time 1.31 +57 self_attn.o_proj +Pruning ... +0.49997578561306 0.09997579455375671 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4398694486684214 0.9709505944546686 1.0 +err_prefin 19992.44921875 +err_fin 9417.009765625 +sparsity check 0.49997578561306 +time 67.67 +57 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2257321.0 +err_fin 1533310.25 +sparsity check 0.49999999574252535 +time 137.69 +57 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 1962325.25 +err_fin 1322062.25 +sparsity check 0.49999999574252535 +time 137.91 +57 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 263333.6875 +err_fin 245607.65625 +sparsity check 0.49999999574252535 +time 135.67 +58 self_attn.q_proj +Pruning ... +0.4999881684780121 0.0999881774187088 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399087049976413 0.9709505944546686 1.0 +err_prefin 57746.41796875 +err_fin 39897.58203125 +sparsity check 0.4999881684780121 +time 74.85 +58 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 20086.544921875 +err_fin 16584.66796875 +sparsity check 0.49999988079071045 +time 1.30 +58 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 53898.4453125 +err_fin 48980.71484375 +sparsity check 0.49999988079071045 +time 1.31 +58 self_attn.o_proj +Pruning ... +0.49996423721313477 0.09996424615383148 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4398328355561842 0.9709505944546686 1.0 +err_prefin 14397.3349609375 +err_fin 6855.41748046875 +sparsity check 0.49996423721313477 +time 67.77 +58 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2304389.5 +err_fin 1570758.625 +sparsity check 0.49999999574252535 +time 137.87 +58 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2006441.25 +err_fin 1357478.75 +sparsity check 0.49999999574252535 +time 138.14 +58 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 265134.375 +err_fin 247697.96875 +sparsity check 0.49999999574252535 +time 135.77 +59 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 72747.109375 +err_fin 50376.4765625 +sparsity check 0.49998772144317627 +time 74.85 +59 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 23255.9375 +err_fin 19589.732421875 +sparsity check 0.49999988079071045 +time 1.32 +59 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 60189.921875 +err_fin 54635.609375 +sparsity check 0.49999988079071045 +time 1.30 +59 self_attn.o_proj +Pruning ... +0.49997544288635254 0.09997545182704926 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4398683621165247 0.9709505944546686 1.0 +err_prefin 14854.1142578125 +err_fin 6778.9365234375 +sparsity check 0.49997544288635254 +time 67.75 +59 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2355445.75 +err_fin 1611687.125 +sparsity check 0.49999999574252535 +time 137.81 +59 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2056139.5 +err_fin 1396654.75 +sparsity check 0.49999999574252535 +time 137.99 +59 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 265730.625 +err_fin 248909.6875 +sparsity check 0.49999999574252535 +time 135.77 +60 self_attn.q_proj +Pruning ... +0.499952495098114 0.09995250403881073 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4397956060958843 0.9709505944546686 1.0 +err_prefin 14072.958984375 +err_fin 9710.4326171875 +sparsity check 0.499952495098114 +time 74.84 +60 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 5356.01171875 +err_fin 4371.15625 +sparsity check 0.49999988079071045 +time 1.31 +60 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 32008.37109375 +err_fin 29080.765625 +sparsity check 0.49999988079071045 +time 1.30 +60 self_attn.o_proj +Pruning ... +0.4999999850988388 0.09999999403953552 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399461639197515 0.9709505944546686 1.0 +err_prefin 8923.4423828125 +err_fin 4361.81982421875 +sparsity check 0.4999999850988388 +time 67.71 +60 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2387430.5 +err_fin 1642731.5 +sparsity check 0.49999999574252535 +time 137.71 +60 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2096408.625 +err_fin 1431900.75 +sparsity check 0.49999999574252535 +time 138.01 +60 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 264923.25 +err_fin 248725.640625 +sparsity check 0.49999999574252535 +time 135.66 +61 self_attn.q_proj +Pruning ... +0.4999639242887497 0.09996393322944641 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.439831843429192 0.9709505944546686 1.0 +err_prefin 47633.66015625 +err_fin 33410.7421875 +sparsity check 0.4999639242887497 +time 74.87 +61 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 16931.99609375 +err_fin 14150.962890625 +sparsity check 0.49999988079071045 +time 1.31 +61 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 54502.34765625 +err_fin 50002.5234375 +sparsity check 0.49999988079071045 +time 1.31 +61 self_attn.o_proj +Pruning ... +0.4999999850988388 0.09999999403953552 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399461639197515 0.9709505944546686 1.0 +err_prefin 13544.08203125 +err_fin 6578.74658203125 +sparsity check 0.4999999850988388 +time 67.70 +61 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2429836.0 +err_fin 1676024.0 +sparsity check 0.49999999574252535 +time 137.79 +61 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2142882.75 +err_fin 1467843.25 +sparsity check 0.49999999574252535 +time 138.09 +61 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 266466.875 +err_fin 250583.453125 +sparsity check 0.49999999574252535 +time 135.77 +62 self_attn.q_proj +Pruning ... +0.4999881684780121 0.0999881774187088 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399087049976413 0.9709505944546686 1.0 +err_prefin 52108.1484375 +err_fin 36507.953125 +sparsity check 0.4999881684780121 +time 74.97 +62 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 18821.046875 +err_fin 15770.6708984375 +sparsity check 0.49999988079071045 +time 1.32 +62 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 59863.88671875 +err_fin 54585.32421875 +sparsity check 0.49999988079071045 +time 1.33 +62 self_attn.o_proj +Pruning ... +0.4999999850988388 0.09999999403953552 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399461639197515 0.9709505944546686 1.0 +err_prefin 12698.0546875 +err_fin 5501.97021484375 +sparsity check 0.4999999850988388 +time 67.85 +62 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2478572.0 +err_fin 1717879.0 +sparsity check 0.49999999574252535 +time 137.85 +62 mlp.up_proj +Pruning ... +0.4999930475439344 0.1999756246805191 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968089448042682 0.9709505944546686 1.0 +err_prefin 2197324.5 +err_fin 1512603.5 +sparsity check 0.4999930475439344 +time 138.11 +62 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 269533.28125 +err_fin 253766.6875 +sparsity check 0.49999999574252535 +time 135.78 +63 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 45090.234375 +err_fin 31936.611328125 +sparsity check 0.49998772144317627 +time 74.96 +63 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 14729.412109375 +err_fin 12177.5546875 +sparsity check 0.49999988079071045 +time 1.34 +63 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 47941.43359375 +err_fin 43172.2734375 +sparsity check 0.49999988079071045 +time 1.31 +63 self_attn.o_proj +Pruning ... +0.49934376776218414 0.09934377670288086 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.437862545964863 0.9709505944546686 1.0 +err_prefin 8912.994140625 +err_fin 4147.9580078125 +sparsity check 0.49934376776218414 +time 67.83 +63 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2530823.0 +err_fin 1761127.75 +sparsity check 0.49999999574252535 +time 137.82 +63 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2248559.0 +err_fin 1554851.0 +sparsity check 0.49999999574252535 +time 138.01 +63 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 273414.4375 +err_fin 257696.5 +sparsity check 0.49999999574252535 +time 135.76 +64 self_attn.q_proj +Pruning ... +0.49997590482234955 0.09997591376304626 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4398698265990744 0.9709505944546686 1.0 +err_prefin 83136.7265625 +err_fin 58826.8125 +sparsity check 0.49997590482234955 +time 75.01 +64 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 27826.56640625 +err_fin 23221.775390625 +sparsity check 0.49999988079071045 +time 1.32 +64 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 66823.0 +err_fin 61412.35546875 +sparsity check 0.49999988079071045 +time 1.31 +64 self_attn.o_proj +Pruning ... +0.4999999850988388 0.09999999403953552 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399461639197515 0.9709505944546686 1.0 +err_prefin 19559.7578125 +err_fin 8276.00390625 +sparsity check 0.4999999850988388 +time 67.84 +64 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2579009.0 +err_fin 1800084.625 +sparsity check 0.49999999574252535 +time 137.79 +64 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2299356.5 +err_fin 1594780.0 +sparsity check 0.49999999574252535 +time 138.08 +64 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 278264.25 +err_fin 262473.65625 +sparsity check 0.49999999574252535 +time 135.67 +65 self_attn.q_proj +Pruning ... +0.49996471405029297 0.09996473789215088 0.3999999761581421 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4398343858930924 0.9709505944546686 1.0 +err_prefin 24457.505859375 +err_fin 17313.802734375 +sparsity check 0.49996471405029297 +time 74.98 +65 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 8344.43359375 +err_fin 6731.2822265625 +sparsity check 0.49999988079071045 +time 1.33 +65 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 37324.55078125 +err_fin 33578.91796875 +sparsity check 0.49999988079071045 +time 1.33 +65 self_attn.o_proj +Pruning ... +0.49989132583141327 0.09989133477210999 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4396016275189618 0.9709505944546686 1.0 +err_prefin 8445.197265625 +err_fin 3808.46875 +sparsity check 0.49989132583141327 +time 67.83 +65 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2621310.5 +err_fin 1836813.125 +sparsity check 0.49999999574252535 +time 137.83 +65 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2349046.5 +err_fin 1636942.625 +sparsity check 0.49999999574252535 +time 138.02 +65 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 283536.125 +err_fin 267619.4375 +sparsity check 0.49999999574252535 +time 135.77 +66 self_attn.q_proj +Pruning ... +0.4999881684780121 0.0999881774187088 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399087049976413 0.9709505944546686 1.0 +err_prefin 43862.71484375 +err_fin 30926.05859375 +sparsity check 0.4999881684780121 +time 74.98 +66 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 15598.61328125 +err_fin 12776.958984375 +sparsity check 0.49999988079071045 +time 1.32 +66 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 48583.1796875 +err_fin 44552.58984375 +sparsity check 0.49999988079071045 +time 1.31 +66 self_attn.o_proj +Pruning ... +0.4999999850988388 0.09999999403953552 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399461639197515 0.9709505944546686 1.0 +err_prefin 15405.40234375 +err_fin 7926.279296875 +sparsity check 0.4999999850988388 +time 67.85 +66 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2680995.25 +err_fin 1879446.625 +sparsity check 0.49999999574252535 +time 137.82 +66 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2419187.25 +err_fin 1686362.125 +sparsity check 0.49999999574252535 +time 138.10 +66 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 296520.46875 +err_fin 279841.875 +sparsity check 0.49999999574252535 +time 135.74 +67 self_attn.q_proj +Pruning ... +0.4999881684780121 0.0999881774187088 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399087049976413 0.9709505944546686 1.0 +err_prefin 19128.6796875 +err_fin 13560.47265625 +sparsity check 0.4999881684780121 +time 74.99 +67 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 6277.0283203125 +err_fin 4847.79541015625 +sparsity check 0.49999988079071045 +time 1.33 +67 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 20735.083984375 +err_fin 18118.58984375 +sparsity check 0.49999988079071045 +time 1.33 +67 self_attn.o_proj +Pruning ... +0.4955729842185974 0.09557299315929413 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4257537146560133 0.9709505944546686 1.0 +err_prefin 4755.5068359375 +err_fin 1735.486328125 +sparsity check 0.4955729842185974 +time 67.80 +67 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2708568.5 +err_fin 1903017.75 +sparsity check 0.49999999574252535 +time 137.85 +67 mlp.up_proj +Pruning ... +0.49999999148505075 0.19999994337558746 0.4428571505205972 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968228405567136 0.9709505944546686 1.0 +err_prefin 2461934.5 +err_fin 1720931.75 +sparsity check 0.49999999148505075 +time 138.01 +67 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 300484.4375 +err_fin 283625.40625 +sparsity check 0.49999999574252535 +time 135.76 +68 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 151678.546875 +err_fin 107747.7578125 +sparsity check 0.49998772144317627 +time 75.01 +68 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 47341.45703125 +err_fin 40328.2421875 +sparsity check 0.49999988079071045 +time 1.33 +68 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 97499.9140625 +err_fin 90175.765625 +sparsity check 0.49999988079071045 +time 1.33 +68 self_attn.o_proj +Pruning ... +0.4997800439596176 0.09978005290031433 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4392485790125884 0.9709505944546686 1.0 +err_prefin 16619.52734375 +err_fin 8152.15625 +sparsity check 0.4997800439596176 +time 67.83 +68 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2801124.75 +err_fin 1972732.0 +sparsity check 0.49999999574252535 +time 137.91 +68 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2563574.25 +err_fin 1795951.25 +sparsity check 0.49999999574252535 +time 138.38 +68 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 321001.375 +err_fin 302484.65625 +sparsity check 0.49999999574252535 +time 135.86 +69 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 249855.0625 +err_fin 178340.03125 +sparsity check 0.49998772144317627 +time 75.04 +69 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 84946.28125 +err_fin 73483.71875 +sparsity check 0.49999988079071045 +time 1.32 +69 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 130384.75 +err_fin 119503.3671875 +sparsity check 0.49999988079071045 +time 1.33 +69 self_attn.o_proj +Pruning ... +0.4963749498128891 0.09637495875358582 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4283485972673708 0.9709505944546686 1.0 +err_prefin 29519.19140625 +err_fin 13056.2744140625 +sparsity check 0.4963749498128891 +time 67.92 +69 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2851497.0 +err_fin 2009025.75 +sparsity check 0.49999999574252535 +time 137.93 +69 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2639775.0 +err_fin 1850543.0 +sparsity check 0.49999999574252535 +time 138.11 +69 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 337184.28125 +err_fin 318016.5 +sparsity check 0.49999999574252535 +time 135.74 +70 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 145374.59375 +err_fin 103462.859375 +sparsity check 0.49998772144317627 +time 75.11 +70 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 46898.140625 +err_fin 39353.4453125 +sparsity check 0.49999988079071045 +time 1.34 +70 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 76992.0546875 +err_fin 70274.2109375 +sparsity check 0.49999988079071045 +time 1.33 +70 self_attn.o_proj +Pruning ... +0.49335020780563354 0.09335021674633026 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4185052212485088 0.9709505944546686 1.0 +err_prefin 23318.654296875 +err_fin 10277.1904296875 +sparsity check 0.49335020780563354 +time 67.97 +70 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2915899.0 +err_fin 2054024.875 +sparsity check 0.49999999574252535 +time 137.91 +70 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2717377.5 +err_fin 1904456.0 +sparsity check 0.49999999574252535 +time 138.21 +70 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 360152.78125 +err_fin 339412.3125 +sparsity check 0.49999999574252535 +time 135.73 +71 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 190468.0625 +err_fin 136425.984375 +sparsity check 0.49998772144317627 +time 74.99 +71 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 63572.9140625 +err_fin 53767.1953125 +sparsity check 0.49999988079071045 +time 1.32 +71 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 80682.921875 +err_fin 74023.1640625 +sparsity check 0.49999988079071045 +time 1.33 +71 self_attn.o_proj +Pruning ... +0.49673882126808167 0.09673883020877838 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4295224405461973 0.9709505944546686 1.0 +err_prefin 30127.17578125 +err_fin 13263.951171875 +sparsity check 0.49673882126808167 +time 67.89 +71 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 3005487.5 +err_fin 2112914.75 +sparsity check 0.49999999574252535 +time 137.85 +71 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2807423.5 +err_fin 1965394.0 +sparsity check 0.49999999574252535 +time 138.01 +71 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 381285.5 +err_fin 358692.25 +sparsity check 0.49999999574252535 +time 135.80 +72 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 263776.59375 +err_fin 188009.8125 +sparsity check 0.49998772144317627 +time 74.98 +72 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 80561.09375 +err_fin 70249.53125 +sparsity check 0.49999988079071045 +time 1.34 +72 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 92642.984375 +err_fin 85140.859375 +sparsity check 0.49999988079071045 +time 1.32 +72 self_attn.o_proj +Pruning ... +0.4999038428068161 0.09990385174751282 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4396413259491503 0.9709505944546686 1.0 +err_prefin 37416.44921875 +err_fin 18725.60546875 +sparsity check 0.4999038428068161 +time 67.84 +72 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 3078066.0 +err_fin 2158296.0 +sparsity check 0.49999999574252535 +time 137.80 +72 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2895632.75 +err_fin 2021234.0 +sparsity check 0.49999999574252535 +time 138.13 +72 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 409946.15625 +err_fin 385314.8125 +sparsity check 0.49999999574252535 +time 135.74 +73 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 246956.09375 +err_fin 175104.59375 +sparsity check 0.49998772144317627 +time 74.97 +73 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 77215.15625 +err_fin 66814.109375 +sparsity check 0.49999988079071045 +time 1.33 +73 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 107636.7109375 +err_fin 97223.234375 +sparsity check 0.49999988079071045 +time 1.33 +73 self_attn.o_proj +Pruning ... +0.4999135881662369 0.0999135971069336 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4396722322722684 0.9709505944546686 1.0 +err_prefin 25167.322265625 +err_fin 12153.6572265625 +sparsity check 0.4999135881662369 +time 67.84 +73 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 3152760.75 +err_fin 2202667.5 +sparsity check 0.49999999574252535 +time 137.82 +73 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2980890.0 +err_fin 2074600.75 +sparsity check 0.49999999574252535 +time 138.02 +73 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 451407.625 +err_fin 421831.125 +sparsity check 0.49999999574252535 +time 135.76 +74 self_attn.q_proj +Pruning ... +0.4999999850988388 0.09999999403953552 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399461639197515 0.9709505944546686 1.0 +err_prefin 227363.40625 +err_fin 159704.0625 +sparsity check 0.4999999850988388 +time 75.01 +74 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 70052.21875 +err_fin 58712.01171875 +sparsity check 0.49999988079071045 +time 1.32 +74 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 81947.59375 +err_fin 73279.21875 +sparsity check 0.49999988079071045 +time 1.32 +74 self_attn.o_proj +Pruning ... +0.49862484633922577 0.09862485527992249 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4355718584986665 0.9709505944546686 1.0 +err_prefin 49821.27734375 +err_fin 20287.7265625 +sparsity check 0.49862484633922577 +time 67.85 +74 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 3228713.25 +err_fin 2229053.25 +sparsity check 0.49999999574252535 +time 137.83 +74 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 3063793.0 +err_fin 2106208.5 +sparsity check 0.49999999574252535 +time 138.11 +74 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 496875.25 +err_fin 461544.6875 +sparsity check 0.49999999574252535 +time 135.76 +75 self_attn.q_proj +Pruning ... +0.4999999850988388 0.09999999403953552 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399461639197515 0.9709505944546686 1.0 +err_prefin 229521.375 +err_fin 159013.125 +sparsity check 0.4999999850988388 +time 74.99 +75 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 67270.984375 +err_fin 55461.5546875 +sparsity check 0.49999988079071045 +time 1.34 +75 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 96647.6875 +err_fin 85233.734375 +sparsity check 0.49999988079071045 +time 1.32 +75 self_attn.o_proj +Pruning ... +0.49730534851551056 0.09730535745620728 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4313456994326053 0.9709505944546686 1.0 +err_prefin 45578.48828125 +err_fin 18587.2421875 +sparsity check 0.49730534851551056 +time 67.86 +75 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 3260017.0 +err_fin 2230485.75 +sparsity check 0.49999999574252535 +time 137.85 +75 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 3095145.25 +err_fin 2107935.5 +sparsity check 0.49999999574252535 +time 138.03 +75 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 559767.375 +err_fin 513296.1875 +sparsity check 0.49999999574252535 +time 135.77 +76 self_attn.q_proj +Pruning ... +0.4999999850988388 0.09999999403953552 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399461639197515 0.9709505944546686 1.0 +err_prefin 348435.8125 +err_fin 235325.90625 +sparsity check 0.4999999850988388 +time 74.99 +76 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 97760.9296875 +err_fin 80445.140625 +sparsity check 0.49999988079071045 +time 1.34 +76 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 143229.6875 +err_fin 125784.1484375 +sparsity check 0.49999988079071045 +time 1.31 +76 self_attn.o_proj +Pruning ... +0.489165797829628 0.08916580677032471 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4046298318294044 0.9709505944546686 1.0 +err_prefin 153192.640625 +err_fin 76294.2109375 +sparsity check 0.489165797829628 +time 67.90 +76 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 3252784.0 +err_fin 2172913.0 +sparsity check 0.49999999574252535 +time 137.81 +76 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 3067312.25 +err_fin 2038594.0 +sparsity check 0.49999999574252535 +time 138.11 +76 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 679483.9375 +err_fin 606641.75 +sparsity check 0.49999999574252535 +time 135.77 +77 self_attn.q_proj +Pruning ... +0.4999999850988388 0.09999999403953552 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399461639197515 0.9709505944546686 1.0 +err_prefin 232745.203125 +err_fin 151074.234375 +sparsity check 0.4999999850988388 +time 74.99 +77 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 71468.984375 +err_fin 56219.4765625 +sparsity check 0.49999988079071045 +time 1.33 +77 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 99336.578125 +err_fin 85069.78125 +sparsity check 0.49999988079071045 +time 1.31 +77 self_attn.o_proj +Pruning ... +0.4945366233587265 0.09453663229942322 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.422384499387892 0.9709505944546686 1.0 +err_prefin 61839.5625 +err_fin 21203.197265625 +sparsity check 0.4945366233587265 +time 67.84 +77 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 3049443.0 +err_fin 1970115.0 +sparsity check 0.49999999574252535 +time 137.82 +77 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2879499.75 +err_fin 1849714.0 +sparsity check 0.49999999574252535 +time 138.07 +77 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 790908.0 +err_fin 679657.75 +sparsity check 0.49999999574252535 +time 135.75 +78 self_attn.q_proj +Pruning ... +0.4999999850988388 0.09999999403953552 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399461639197515 0.9709505944546686 1.0 +err_prefin 220484.484375 +err_fin 131148.265625 +sparsity check 0.4999999850988388 +time 74.97 +78 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 73585.7265625 +err_fin 55216.25390625 +sparsity check 0.49999988079071045 +time 1.33 +78 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 87354.7421875 +err_fin 75689.359375 +sparsity check 0.49999988079071045 +time 1.31 +78 self_attn.o_proj +Pruning ... +0.4979788661003113 0.097978875041008 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4335064294429443 0.9709505944546686 1.0 +err_prefin 42572.82421875 +err_fin 16020.3203125 +sparsity check 0.4979788661003113 +time 67.85 +78 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2573022.5 +err_fin 1585658.75 +sparsity check 0.49999999574252535 +time 137.82 +78 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2404934.5 +err_fin 1474085.5 +sparsity check 0.49999999574252535 +time 138.32 +78 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 857094.25 +err_fin 686390.6875 +sparsity check 0.49999999574252535 +time 135.81 +79 self_attn.q_proj +Pruning ... +0.49998800456523895 0.09998801350593567 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399081853751627 0.9709505944546686 1.0 +err_prefin 143236.46875 +err_fin 77749.5390625 +sparsity check 0.49998800456523895 +time 74.99 +79 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 54170.86328125 +err_fin 40600.9609375 +sparsity check 0.49999988079071045 +time 1.32 +79 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 39567.859375 +err_fin 31839.0 +sparsity check 0.49999988079071045 +time 1.32 +79 self_attn.o_proj +Pruning ... +0.49016299843788147 0.09016300737857819 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4079642697638912 0.9709505944546686 1.0 +err_prefin 12191.912109375 +err_fin 2890.40625 +sparsity check 0.49016299843788147 +time 67.90 +79 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 1599801.125 +err_fin 894345.875 +sparsity check 0.49999999574252535 +time 137.84 +79 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 1467271.75 +err_fin 819270.125 +sparsity check 0.49999999574252535 +time 138.02 +79 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 820401.125 +err_fin 514209.375 +sparsity check 0.49999999574252535 +time 135.80 +model.embed_tokens.weight tensor(2.5520e-06) +model.layers.0.self_attn.q_proj.weight tensor(4.6343e-06) +model.layers.0.self_attn.k_proj.weight tensor(0.0204) +model.layers.0.self_attn.v_proj.weight tensor(0.0536) +model.layers.0.self_attn.o_proj.weight tensor(3.9041e-06) +model.layers.0.mlp.gate_proj.weight tensor(2.7333e-06) +model.layers.0.mlp.up_proj.weight tensor(2.9206e-06) +model.layers.0.mlp.down_proj.weight tensor(0.0126) +50911.568996191025 +Dataset: wikitext2 +Evaluating ... +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +Perplexity: 3.673633 diff --git a/logs/llama2-70-0.5-no-final b/logs/llama2-70-0.5-no-final new file mode 100644 index 0000000..7f3557d --- /dev/null +++ b/logs/llama2-70-0.5-no-final @@ -0,0 +1,2897 @@ +Running on dev: cuda:0 +loading llama +llama loaded +Starting... on device cuda:0 +Ready. +0 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 10.717611312866211 +time 74.34 +0 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 21.521377563476562 +time 1.29 +0 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 2.3604230880737305 +time 1.29 +0 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 0.3077867031097412 +time 66.99 +0 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 232.090087890625 +time 132.95 +0 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 232.13897705078125 +time 133.22 +0 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 4.307583332061768 +time 132.33 +1 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 201.70103454589844 +time 74.12 +1 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 192.19281005859375 +time 1.31 +1 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 17.129507064819336 +time 1.31 +1 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 9.993308067321777 +time 66.99 +1 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1592.91357421875 +time 132.49 +1 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1731.1697998046875 +time 132.92 +1 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 102.35027313232422 +time 132.15 +2 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 690.9444580078125 +time 74.06 +2 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 813.6088256835938 +time 1.31 +2 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 82.71668243408203 +time 1.31 +2 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 53.367958068847656 +time 67.04 +2 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 7192.265625 +time 132.52 +2 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 7540.5107421875 +time 132.90 +2 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 375.14044189453125 +time 132.14 +3 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 5224.77783203125 +time 74.02 +3 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 3596.468505859375 +time 1.30 +3 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 1143.7545166015625 +time 1.31 +3 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 106.50784301757812 +time 66.93 +3 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 18111.9453125 +time 132.51 +3 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 18275.353515625 +time 132.93 +3 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 569.8212890625 +time 132.08 +4 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 11148.994140625 +time 74.05 +4 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 5542.4638671875 +time 1.30 +4 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 2011.840576171875 +time 1.30 +4 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 131.37802124023438 +time 66.96 +4 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 32252.333984375 +time 132.55 +4 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 31786.78515625 +time 132.92 +4 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 882.1239013671875 +time 132.08 +5 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 16082.5703125 +time 74.02 +5 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 9010.7841796875 +time 1.30 +5 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 2952.642578125 +time 1.30 +5 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 213.86009216308594 +time 66.93 +5 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 42302.0390625 +time 132.50 +5 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 41377.6875 +time 132.87 +5 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1255.13427734375 +time 132.12 +6 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 18580.162109375 +time 74.03 +6 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 10272.94140625 +time 1.32 +6 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 3491.278564453125 +time 1.30 +6 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 417.36669921875 +time 66.99 +6 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 57757.77734375 +time 132.54 +6 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 56122.015625 +time 132.88 +6 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1871.470458984375 +time 132.04 +7 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 29744.19140625 +time 74.01 +7 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 15506.8740234375 +time 1.31 +7 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 5035.0078125 +time 1.30 +7 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 506.8324279785156 +time 66.97 +7 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 72811.9140625 +time 132.51 +7 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 70573.25 +time 132.87 +7 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 2655.89404296875 +time 132.12 +8 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 26077.62109375 +time 74.05 +8 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 13514.57421875 +time 1.30 +8 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 4806.9443359375 +time 1.30 +8 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 844.914306640625 +time 66.97 +8 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 102421.359375 +time 132.53 +8 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 97549.1953125 +time 132.84 +8 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 4729.75341796875 +time 132.09 +9 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 29380.02734375 +time 74.00 +9 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 15578.587890625 +time 1.31 +9 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 7502.291015625 +time 1.31 +9 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 459.5536193847656 +time 66.93 +9 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 133887.84375 +time 132.51 +9 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 126507.9453125 +time 132.89 +9 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 4669.8134765625 +time 132.08 +10 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 17848.37890625 +time 74.01 +10 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 7432.9658203125 +time 1.30 +10 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 7279.8779296875 +time 1.30 +10 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 358.6090087890625 +time 66.98 +10 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 164667.015625 +time 132.50 +10 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 154845.6875 +time 132.76 +10 mlp.down_proj +Pruning ... +0.4999999872275761 0.2499999701976776 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218706168299 0.9709505944546686 1.0 +err_prefin 5296.51416015625 +time 132.12 +11 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 28532.48046875 +time 74.02 +11 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 15705.3203125 +time 1.31 +11 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 8515.6044921875 +time 1.29 +11 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 568.9130859375 +time 66.97 +11 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 178439.65625 +time 132.51 +11 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 168405.421875 +time 132.92 +11 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 5870.9169921875 +time 132.11 +12 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 24391.189453125 +time 74.02 +12 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 13878.55859375 +time 1.31 +12 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 6258.75048828125 +time 1.30 +12 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 705.231689453125 +time 66.98 +12 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 176686.15625 +time 132.53 +12 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 168170.15625 +time 132.90 +12 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 6499.34326171875 +time 132.08 +13 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 31191.0859375 +time 74.00 +13 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 14700.6474609375 +time 1.31 +13 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 8553.900390625 +time 1.30 +13 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 1830.4453125 +time 66.89 +13 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 186636.21875 +time 132.50 +13 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 179507.25 +time 132.85 +13 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 8272.4453125 +time 132.07 +14 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 50958.8984375 +time 74.04 +14 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 25200.478515625 +time 1.31 +14 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 11939.166015625 +time 1.30 +14 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 2202.9189453125 +time 66.99 +14 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 217031.734375 +time 132.52 +14 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 208673.453125 +time 132.80 +14 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 8876.578125 +time 132.06 +15 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 37785.51171875 +time 74.02 +15 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 17779.82421875 +time 1.32 +15 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 14876.7451171875 +time 1.30 +15 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 2147.2265625 +time 66.97 +15 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 232639.15625 +time 132.53 +15 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 224543.671875 +time 132.91 +15 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 10591.8955078125 +time 132.05 +16 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 36921.046875 +time 74.00 +16 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 18139.66796875 +time 1.31 +16 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 13697.25390625 +time 1.29 +16 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 1860.264892578125 +time 66.96 +16 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 245963.125 +time 132.72 +16 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 238381.765625 +time 133.25 +16 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 10677.962890625 +time 132.36 +17 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 47274.65234375 +time 74.00 +17 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 24148.25 +time 1.31 +17 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 11666.39453125 +time 1.30 +17 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 2883.78466796875 +time 66.96 +17 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 242515.671875 +time 132.74 +17 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 235619.421875 +time 133.17 +17 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 13573.5234375 +time 132.43 +18 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 44734.6328125 +time 73.99 +18 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 21054.4296875 +time 1.31 +18 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 18631.873046875 +time 1.30 +18 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 2936.5966796875 +time 66.98 +18 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 269289.59375 +time 132.74 +18 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 259858.96875 +time 133.15 +18 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 14795.94921875 +time 132.43 +19 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 53659.69140625 +time 74.04 +19 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 25075.787109375 +time 1.31 +19 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 22434.982421875 +time 1.31 +19 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 2300.91943359375 +time 66.99 +19 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 292668.4375 +time 132.78 +19 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 282869.3125 +time 133.17 +19 mlp.down_proj +Pruning ... +0.4999999872275761 0.2499999701976776 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218706168299 0.9709505944546686 1.0 +err_prefin 15182.5849609375 +time 132.37 +20 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 48110.94921875 +time 73.99 +20 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 21109.404296875 +time 1.31 +20 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 18582.00390625 +time 1.30 +20 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 1811.337890625 +time 66.96 +20 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 316026.875 +time 132.75 +20 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 306260.875 +time 133.16 +20 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 15475.8134765625 +time 132.43 +21 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 30222.734375 +time 74.00 +21 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 11362.373046875 +time 1.31 +21 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 15775.4921875 +time 1.30 +21 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 2444.572265625 +time 66.93 +21 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 330811.75 +time 132.74 +21 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 322009.84375 +time 133.14 +21 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 16136.716796875 +time 132.39 +22 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 29106.16796875 +time 73.99 +22 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 13556.794921875 +time 1.31 +22 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 18032.66015625 +time 1.30 +22 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 1689.82666015625 +time 66.96 +22 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 348179.09375 +time 132.73 +22 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 339720.15625 +time 133.06 +22 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 16580.63671875 +time 132.43 +23 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 41801.40625 +time 73.99 +23 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 21408.697265625 +time 1.30 +23 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 18993.08203125 +time 1.30 +23 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 2295.337646484375 +time 66.98 +23 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 358201.5 +time 132.72 +23 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 350123.40625 +time 133.19 +23 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 17124.4765625 +time 132.50 +24 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 36886.6328125 +time 74.02 +24 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 18703.375 +time 1.31 +24 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 14053.79296875 +time 1.30 +24 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 2117.28173828125 +time 66.97 +24 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 346989.6875 +time 132.65 +24 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 341473.03125 +time 132.92 +24 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 17269.02734375 +time 132.23 +25 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 42170.6015625 +time 74.23 +25 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 21158.166015625 +time 1.31 +25 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 14595.8388671875 +time 1.31 +25 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 4943.6875 +time 67.18 +25 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 326379.40625 +time 133.18 +25 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 323482.0 +time 133.58 +25 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 23475.69921875 +time 132.68 +26 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 61465.19921875 +time 74.34 +26 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 31137.2109375 +time 1.30 +26 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 22068.560546875 +time 1.30 +26 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 5582.666015625 +time 67.28 +26 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 372075.3125 +time 133.29 +26 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 367292.0 +time 133.53 +26 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 23420.568359375 +time 132.82 +27 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 28320.3671875 +time 74.35 +27 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 13523.166015625 +time 1.32 +27 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 19238.0 +time 1.31 +27 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 2411.788330078125 +time 67.33 +27 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 394936.0 +time 133.34 +27 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 389494.90625 +time 133.66 +27 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 24680.826171875 +time 132.75 +28 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 50749.91796875 +time 74.33 +28 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 27150.32421875 +time 1.31 +28 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 28964.42578125 +time 1.31 +28 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 3809.74755859375 +time 67.31 +28 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 416990.125 +time 133.28 +28 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 411412.4375 +time 133.67 +28 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 25919.30078125 +time 132.69 +29 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 50315.03515625 +time 74.31 +29 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 23235.5703125 +time 1.31 +29 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 31637.900390625 +time 1.31 +29 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 4623.9306640625 +time 67.29 +29 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 441865.75 +time 133.23 +29 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 434700.3125 +time 133.61 +29 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 27187.61328125 +time 132.87 +30 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 26189.08203125 +time 74.31 +30 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 10043.103515625 +time 1.30 +30 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 29211.3203125 +time 1.30 +30 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 2231.01708984375 +time 67.23 +30 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 468689.40625 +time 133.22 +30 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 459814.4375 +time 133.69 +30 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 28129.546875 +time 132.72 +31 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 52598.1171875 +time 74.30 +31 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 23865.44921875 +time 1.32 +31 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 30826.552734375 +time 1.31 +31 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 4350.78466796875 +time 67.24 +31 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 489889.4375 +time 133.22 +31 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 479781.78125 +time 133.63 +31 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 28902.79296875 +time 132.81 +32 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 59903.83203125 +time 74.33 +32 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 29770.015625 +time 1.30 +32 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 28210.146484375 +time 1.31 +32 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 3051.3134765625 +time 67.29 +32 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 501656.375 +time 133.26 +32 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 492198.875 +time 133.63 +32 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 30499.484375 +time 132.80 +33 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 84420.5390625 +time 74.33 +33 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 39708.3359375 +time 1.32 +33 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 30832.232421875 +time 1.31 +33 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 6309.8017578125 +time 67.28 +33 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 506694.9375 +time 133.23 +33 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 498621.0625 +time 133.61 +33 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 34879.8671875 +time 132.80 +34 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 30183.8125 +time 74.33 +34 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 12323.650390625 +time 1.32 +34 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 18397.08203125 +time 1.30 +34 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 4200.53369140625 +time 67.30 +34 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 522844.46875 +time 133.29 +34 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 514039.8125 +time 133.61 +34 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 37937.6640625 +time 132.73 +35 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 50034.2109375 +time 74.31 +35 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 20875.783203125 +time 1.31 +35 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 25280.05859375 +time 1.31 +35 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 5350.72900390625 +time 67.26 +35 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 554782.75 +time 133.22 +35 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 545162.75 +time 133.58 +35 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 40659.3515625 +time 132.74 +36 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 48221.203125 +time 74.33 +36 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 20184.0546875 +time 1.31 +36 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 24801.09765625 +time 1.31 +36 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 3293.978759765625 +time 67.29 +36 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 567511.875 +time 133.26 +36 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 557947.1875 +time 133.73 +36 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 45245.12109375 +time 132.42 +37 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 75363.3671875 +time 74.31 +37 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 34463.5625 +time 1.31 +37 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 31608.99609375 +time 1.30 +37 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 6403.7568359375 +time 67.27 +37 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 591796.4375 +time 133.23 +37 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 582373.5625 +time 133.56 +37 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 49221.75390625 +time 132.79 +38 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 84602.09375 +time 74.30 +38 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 38396.80078125 +time 1.31 +38 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 32711.265625 +time 1.31 +38 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 10000.1728515625 +time 67.24 +38 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 608665.5 +time 133.21 +38 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 599951.25 +time 133.62 +38 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 55195.0625 +time 132.88 +39 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 103231.5703125 +time 74.33 +39 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 51982.09375 +time 1.31 +39 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 35540.984375 +time 1.31 +39 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 12025.228515625 +time 67.24 +39 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 613712.875 +time 133.23 +39 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 605436.125 +time 133.58 +39 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 67558.8984375 +time 132.79 +40 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 96430.1875 +time 74.29 +40 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 45236.8125 +time 1.31 +40 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 40018.015625 +time 1.31 +40 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 17218.5234375 +time 67.25 +40 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 636673.875 +time 133.22 +40 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 628154.125 +time 133.55 +40 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 76410.84375 +time 132.73 +41 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 77917.921875 +time 74.32 +41 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 35050.51953125 +time 1.33 +41 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 35219.1171875 +time 1.32 +41 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 18799.58984375 +time 67.24 +41 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 663693.875 +time 133.22 +41 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 643667.875 +time 133.58 +41 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 89795.3203125 +time 132.77 +42 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 89954.3125 +time 74.34 +42 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 38151.453125 +time 1.31 +42 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 43517.09375 +time 1.30 +42 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 17671.365234375 +time 67.29 +42 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 716675.5 +time 133.27 +42 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 679881.8125 +time 133.63 +42 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 97907.234375 +time 132.74 +43 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 62594.09375 +time 74.32 +43 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 30825.98828125 +time 1.32 +43 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 33883.3828125 +time 1.31 +43 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 12448.8505859375 +time 67.25 +43 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 744831.6875 +time 133.21 +43 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 699455.75 +time 133.56 +43 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 108464.546875 +time 132.79 +44 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 113070.2578125 +time 74.27 +44 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 40785.4375 +time 1.32 +44 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 53236.5546875 +time 1.31 +44 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 29389.97265625 +time 67.17 +44 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 816565.25 +time 133.19 +44 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 742064.5625 +time 133.56 +44 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 127401.671875 +time 132.59 +45 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 70641.8984375 +time 74.20 +45 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 32233.82421875 +time 1.30 +45 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 51841.7265625 +time 1.30 +45 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 8924.384765625 +time 67.08 +45 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 885152.25 +time 132.92 +45 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 794598.5 +time 133.24 +45 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 131311.15625 +time 132.44 +46 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 60269.14453125 +time 74.16 +46 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 26353.34765625 +time 1.31 +46 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 44948.98828125 +time 1.31 +46 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 11645.3173828125 +time 67.17 +46 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 951803.1875 +time 132.93 +46 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 841496.25 +time 133.29 +46 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 131733.21875 +time 132.53 +47 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 81897.6953125 +time 74.15 +47 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 37858.46875 +time 1.32 +47 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 43521.9765625 +time 1.31 +47 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 18492.826171875 +time 67.12 +47 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 987205.75 +time 132.90 +47 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 861385.25 +time 133.29 +47 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 148283.625 +time 132.50 +48 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 41700.375 +time 74.16 +48 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 14080.6494140625 +time 1.31 +48 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 43042.390625 +time 1.30 +48 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 12309.330078125 +time 67.13 +48 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1033761.25 +time 132.91 +48 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 897758.25 +time 133.28 +48 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 148257.171875 +time 132.48 +49 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 23691.1640625 +time 74.20 +49 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 9239.4814453125 +time 1.32 +49 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 34483.1796875 +time 1.32 +49 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 4694.615234375 +time 67.10 +49 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1075857.125 +time 132.94 +49 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 933454.375 +time 133.26 +49 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 148576.234375 +time 132.40 +50 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 37414.390625 +time 74.24 +50 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 12605.123046875 +time 1.32 +50 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 44598.6484375 +time 1.32 +50 self_attn.o_proj +Pruning ... +0.4999999552965164 0.15999996662139893 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591281689242464 0.9709505944546686 1.0 +err_prefin 8907.14453125 +time 67.17 +50 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1118057.5 +time 132.93 +50 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 966272.375 +time 133.41 +50 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 149397.03125 +time 132.36 +51 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 47924.5 +time 74.20 +51 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 19335.56640625 +time 1.32 +51 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 46904.40625 +time 1.31 +51 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 8192.44140625 +time 67.14 +51 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1150105.0 +time 132.93 +51 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 996269.5 +time 133.29 +51 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 149764.0 +time 132.40 +52 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 81044.359375 +time 74.25 +52 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 25878.015625 +time 1.32 +52 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 53787.68359375 +time 1.31 +52 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 16396.55859375 +time 67.15 +52 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1182271.0 +time 132.93 +52 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1023028.875 +time 133.21 +52 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 153306.609375 +time 132.50 +53 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 25691.26171875 +time 74.21 +53 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 7557.69873046875 +time 1.32 +53 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 44611.39453125 +time 1.32 +53 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 6824.3671875 +time 67.11 +53 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1213898.0 +time 132.92 +53 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1052797.5 +time 133.28 +53 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 155050.875 +time 132.36 +54 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 30003.078125 +time 74.26 +54 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 8660.60546875 +time 1.32 +54 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 46037.44140625 +time 1.31 +54 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 7636.07763671875 +time 67.18 +54 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1244314.875 +time 132.94 +54 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1081664.75 +time 133.38 +54 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 155016.25 +time 132.36 +55 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 48759.3203125 +time 74.22 +55 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 17041.306640625 +time 1.32 +55 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 59746.5390625 +time 1.31 +55 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 14929.080078125 +time 67.12 +55 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1268392.625 +time 132.95 +55 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1110917.0 +time 133.27 +55 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 158858.25 +time 132.37 +56 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 36526.109375 +time 74.24 +56 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 13544.583984375 +time 1.32 +56 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 42510.93359375 +time 1.32 +56 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 8095.60400390625 +time 67.19 +56 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1289659.125 +time 132.90 +56 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1132746.5 +time 133.39 +56 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 161251.5 +time 132.35 +57 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 33041.51171875 +time 74.19 +57 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 10192.71875 +time 1.31 +57 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 54615.609375 +time 1.30 +57 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 8536.6806640625 +time 67.14 +57 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1332029.5 +time 132.93 +57 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1171135.75 +time 133.24 +57 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 164085.640625 +time 132.37 +58 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 18912.63671875 +time 74.16 +58 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 5332.2275390625 +time 1.31 +58 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 39178.5859375 +time 1.31 +58 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 5707.21826171875 +time 67.13 +58 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1361064.0 +time 132.92 +58 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1199729.75 +time 133.21 +58 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 165043.921875 +time 132.43 +59 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 23990.17578125 +time 74.21 +59 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 6799.7958984375 +time 1.31 +59 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 43409.921875 +time 1.31 +59 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 5401.51904296875 +time 67.13 +59 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1394133.25 +time 132.93 +59 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1230493.375 +time 133.23 +59 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 165187.59375 +time 132.34 +60 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 4144.20751953125 +time 74.20 +60 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 1084.0653076171875 +time 1.31 +60 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 23588.876953125 +time 1.31 +60 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 3300.5615234375 +time 67.14 +60 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1415199.75 +time 132.91 +60 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1255729.75 +time 133.17 +60 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 164511.515625 +time 132.44 +61 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 15355.552734375 +time 74.20 +61 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 4016.667724609375 +time 1.31 +61 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 40092.5703125 +time 1.30 +61 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 5419.4111328125 +time 67.12 +61 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1442236.375 +time 132.93 +61 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1285856.25 +time 133.31 +61 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 165382.53125 +time 132.06 +62 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 16637.373046875 +time 74.18 +62 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 4831.4716796875 +time 1.31 +62 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 43328.0859375 +time 1.31 +62 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 4547.39990234375 +time 67.11 +62 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1474178.75 +time 132.90 +62 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1319586.25 +time 133.13 +62 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 167112.3125 +time 132.40 +63 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 15147.34765625 +time 74.16 +63 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 3676.107177734375 +time 1.30 +63 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 33171.86328125 +time 1.30 +63 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 3524.91455078125 +time 67.07 +63 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1508298.375 +time 132.94 +63 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1352186.375 +time 133.39 +63 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 169606.328125 +time 132.42 +64 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 27880.34375 +time 74.15 +64 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 8370.90625 +time 1.31 +64 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 49190.6015625 +time 1.31 +64 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 7736.96044921875 +time 67.11 +64 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1538812.625 +time 132.95 +64 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1384683.375 +time 133.30 +64 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 172562.8125 +time 132.45 +65 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 7831.9677734375 +time 74.14 +65 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 1802.909912109375 +time 1.31 +65 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 26999.32421875 +time 1.30 +65 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 3169.1650390625 +time 67.08 +65 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1565351.0 +time 132.92 +65 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1415175.0 +time 133.24 +65 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 175581.546875 +time 132.38 +66 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 13420.857421875 +time 74.15 +66 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 3411.757568359375 +time 1.29 +66 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 35757.55859375 +time 1.29 +66 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 6590.640625 +time 67.12 +66 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1602629.75 +time 132.93 +66 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1457893.25 +time 133.30 +66 mlp.down_proj +Pruning ... +0.4999999872275761 0.2499999701976776 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218706168299 0.9709505944546686 1.0 +err_prefin 183895.8125 +time 132.47 +67 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 6356.85546875 +time 74.18 +67 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 1641.073974609375 +time 1.32 +67 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 14184.7353515625 +time 1.30 +67 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 1797.1361083984375 +time 67.14 +67 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1619232.625 +time 132.93 +67 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1483684.5 +time 132.94 +67 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 185674.203125 +time 132.42 +68 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 51455.2265625 +time 74.16 +68 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 19140.79296875 +time 1.30 +68 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 71009.984375 +time 1.31 +68 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 7044.2236328125 +time 67.10 +68 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1677991.75 +time 132.91 +68 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1545929.75 +time 133.36 +68 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 198817.625 +time 132.33 +69 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 86279.734375 +time 74.18 +69 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 44683.25 +time 1.31 +69 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 93197.8984375 +time 1.30 +69 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 11332.5498046875 +time 67.12 +69 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1709753.375 +time 132.93 +69 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1592125.25 +time 133.28 +69 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 208887.15625 +time 132.43 +70 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 49584.6171875 +time 74.13 +70 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 19303.70703125 +time 1.31 +70 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 53245.4921875 +time 1.30 +70 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 8677.44140625 +time 67.11 +70 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1748143.75 +time 132.88 +70 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1638158.5 +time 133.25 +70 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 223469.34375 +time 132.44 +71 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 67144.390625 +time 74.13 +71 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 33651.921875 +time 1.31 +71 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 57274.30078125 +time 1.30 +71 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 11107.5009765625 +time 67.09 +71 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1803989.25 +time 132.92 +71 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1694213.25 +time 133.15 +71 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 236803.890625 +time 132.49 +72 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 93249.4453125 +time 74.11 +72 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 48147.8125 +time 1.30 +72 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 66679.4140625 +time 1.30 +72 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 14145.203125 +time 67.07 +72 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1846660.75 +time 132.88 +72 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1744824.75 +time 133.25 +72 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 254743.875 +time 132.37 +73 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 86651.828125 +time 74.13 +73 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 47317.703125 +time 1.30 +73 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 76097.953125 +time 1.29 +73 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 9291.65625 +time 67.05 +73 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1890848.875 +time 132.91 +73 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1795729.0 +time 133.20 +73 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 280777.6875 +time 132.30 +74 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 79523.671875 +time 74.14 +74 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 38743.5703125 +time 1.31 +74 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 57649.984375 +time 1.31 +74 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 18443.35546875 +time 67.11 +74 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1933389.5 +time 132.96 +74 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1842457.0 +time 133.33 +74 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 308752.71875 +time 132.40 +75 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 79962.3125 +time 74.14 +75 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 38553.9453125 +time 1.31 +75 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 64704.87109375 +time 1.30 +75 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 16449.65625 +time 67.11 +75 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1947772.75 +time 132.94 +75 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1854193.25 +time 133.29 +75 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 347022.1875 +time 132.32 +76 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 119896.9375 +time 74.09 +76 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 55633.9375 +time 1.31 +76 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 101904.0 +time 1.29 +76 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 59548.59765625 +time 67.11 +76 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1932840.0 +time 132.90 +76 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1824479.0 +time 133.25 +76 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 418164.375 +time 132.42 +77 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 78987.625 +time 74.10 +77 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 39827.34765625 +time 1.32 +77 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 68742.625 +time 1.30 +77 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 23459.712890625 +time 67.09 +77 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1798901.625 +time 132.93 +77 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1700200.5 +time 133.23 +77 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 480343.46875 +time 132.36 +78 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 70369.5625 +time 74.11 +78 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 37082.4140625 +time 1.32 +78 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 61058.3046875 +time 1.29 +78 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 14321.369140625 +time 67.13 +78 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1499885.5 +time 132.96 +78 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1401595.5 +time 133.21 +78 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 511604.9375 +time 132.48 +79 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 44149.421875 +time 74.07 +79 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 29432.83984375 +time 1.30 +79 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 26794.783203125 +time 1.29 +79 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 3699.406494140625 +time 67.04 +79 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 915986.1875 +time 132.93 +79 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 836133.75 +time 133.28 +79 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 451949.9375 +time 132.50 +model.embed_tokens.weight tensor(2.5520e-06) +model.layers.0.self_attn.q_proj.weight tensor(0.0083) +model.layers.0.self_attn.k_proj.weight tensor(0.0117) +model.layers.0.self_attn.v_proj.weight tensor(0.0441) +model.layers.0.self_attn.o_proj.weight tensor(3.6061e-06) +model.layers.0.mlp.gate_proj.weight tensor(0.0001) +model.layers.0.mlp.up_proj.weight tensor(0.0001) +model.layers.0.mlp.down_proj.weight tensor(0.0047) +49773.809720277786 +Dataset: wikitext2 +Evaluating ... +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +Perplexity: 3.448766 diff --git a/logs/llama2-70-0.6 b/logs/llama2-70-0.6 new file mode 100644 index 0000000..15e7928 --- /dev/null +++ b/logs/llama2-70-0.6 @@ -0,0 +1,4017 @@ +Running on dev: cuda:0 +loading llama +llama loaded +Starting... on device cuda:0 +Ready. +0 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 17.612327575683594 +err_fin 7.10721492767334 +sparsity check 0.3999999612569809 +time 76.26 +0 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 31.51104736328125 +err_fin 10.96957015991211 +sparsity check 0.399999737739563 +time 1.34 +0 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 5.1145734786987305 +err_fin 2.7964329719543457 +sparsity check 0.399999737739563 +time 1.34 +0 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 0.7482728362083435 +err_fin 0.04435748606920242 +sparsity check 0.3999999612569809 +time 68.96 +0 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 472.1926574707031 +err_fin 168.95367431640625 +sparsity check 0.399999988930566 +time 138.68 +0 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 473.89642333984375 +err_fin 168.148193359375 +sparsity check 0.399999988930566 +time 139.05 +0 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 8.996854782104492 +err_fin 4.284056663513184 +sparsity check 0.399999988930566 +time 137.01 +1 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 324.388916015625 +err_fin 104.90257263183594 +sparsity check 0.3999999612569809 +time 75.98 +1 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 333.74029541015625 +err_fin 120.40553283691406 +sparsity check 0.399999737739563 +time 1.33 +1 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 36.60847473144531 +err_fin 19.74991226196289 +sparsity check 0.399999737739563 +time 1.32 +1 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 25.67422866821289 +err_fin 4.771961688995361 +sparsity check 0.3999999612569809 +time 68.78 +1 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3332.1962890625 +err_fin 1083.202392578125 +sparsity check 0.399999988930566 +time 138.79 +1 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3651.33837890625 +err_fin 1152.73779296875 +sparsity check 0.399999988930566 +time 139.22 +1 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 204.66514587402344 +err_fin 161.40135192871094 +sparsity check 0.399999988930566 +time 136.72 +2 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 1261.651123046875 +err_fin 597.3677978515625 +sparsity check 0.3999999612569809 +time 76.00 +2 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 1407.54248046875 +err_fin 703.7958984375 +sparsity check 0.399999737739563 +time 1.60 +2 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 172.35546875 +err_fin 115.82382202148438 +sparsity check 0.399999737739563 +time 1.33 +2 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 124.26980590820312 +err_fin 40.466644287109375 +sparsity check 0.3999999612569809 +time 68.89 +2 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 15045.37890625 +err_fin 6529.49267578125 +sparsity check 0.399999988930566 +time 138.69 +2 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 15851.59375 +err_fin 6701.36474609375 +sparsity check 0.399999988930566 +time 139.19 +2 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 745.590087890625 +err_fin 629.763671875 +sparsity check 0.399999988930566 +time 137.04 +3 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 10852.21484375 +err_fin 6075.66943359375 +sparsity check 0.3999999612569809 +time 75.88 +3 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 7197.013671875 +err_fin 4788.529296875 +sparsity check 0.399999737739563 +time 1.34 +3 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 2397.6689453125 +err_fin 1744.8912353515625 +sparsity check 0.399999737739563 +time 1.33 +3 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 242.5241241455078 +err_fin 114.71422576904297 +sparsity check 0.3999999612569809 +time 68.77 +3 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 37562.2109375 +err_fin 19615.8828125 +sparsity check 0.399999988930566 +time 138.70 +3 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 38100.703125 +err_fin 19501.314453125 +sparsity check 0.399999988930566 +time 138.94 +3 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1130.3961181640625 +err_fin 995.3624877929688 +sparsity check 0.399999988930566 +time 136.96 +4 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 22947.802734375 +err_fin 13750.658203125 +sparsity check 0.3999999612569809 +time 75.92 +4 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 10913.3994140625 +err_fin 7686.8955078125 +sparsity check 0.399999737739563 +time 1.60 +4 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 4031.020263671875 +err_fin 3118.88232421875 +sparsity check 0.399999737739563 +time 1.34 +4 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 293.3388671875 +err_fin 147.85208129882812 +sparsity check 0.3999999612569809 +time 68.83 +4 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 66397.1015625 +err_fin 38667.984375 +sparsity check 0.399999988930566 +time 139.03 +4 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 65667.421875 +err_fin 37706.22265625 +sparsity check 0.399999988930566 +time 139.43 +4 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1737.755615234375 +err_fin 1555.261962890625 +sparsity check 0.399999988930566 +time 137.11 +5 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 34087.51171875 +err_fin 21938.572265625 +sparsity check 0.3999999612569809 +time 75.87 +5 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 17835.97265625 +err_fin 13241.9794921875 +sparsity check 0.399999737739563 +time 1.34 +5 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 5898.74365234375 +err_fin 4773.126953125 +sparsity check 0.399999737739563 +time 1.34 +5 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 455.9298400878906 +err_fin 220.78146362304688 +sparsity check 0.3999999612569809 +time 68.79 +5 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 85977.9140625 +err_fin 53347.79296875 +sparsity check 0.399999988930566 +time 138.90 +5 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 84495.609375 +err_fin 51809.59375 +sparsity check 0.399999988930566 +time 139.19 +5 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2430.72412109375 +err_fin 2164.45361328125 +sparsity check 0.399999988930566 +time 136.95 +6 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 39206.7578125 +err_fin 26081.8671875 +sparsity check 0.3999999612569809 +time 75.93 +6 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 20082.53515625 +err_fin 15431.42578125 +sparsity check 0.399999737739563 +time 1.34 +6 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 6980.0126953125 +err_fin 5785.09619140625 +sparsity check 0.399999737739563 +time 1.34 +6 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 895.5304565429688 +err_fin 499.7837829589844 +sparsity check 0.3999999612569809 +time 68.96 +6 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 116574.0 +err_fin 74599.0078125 +sparsity check 0.399999988930566 +time 138.91 +6 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 113771.078125 +err_fin 72131.609375 +sparsity check 0.399999988930566 +time 139.30 +6 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3635.968505859375 +err_fin 3275.612060546875 +sparsity check 0.399999988930566 +time 136.93 +7 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 63573.28515625 +err_fin 44168.1796875 +sparsity check 0.3999999612569809 +time 75.86 +7 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 30488.50390625 +err_fin 24611.5703125 +sparsity check 0.399999737739563 +time 1.35 +7 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 9955.16796875 +err_fin 8486.0693359375 +sparsity check 0.399999737739563 +time 1.34 +7 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 1042.595947265625 +err_fin 554.978515625 +sparsity check 0.3999999612569809 +time 68.77 +7 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 146512.0 +err_fin 96909.8125 +sparsity check 0.399999988930566 +time 138.99 +7 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 142699.984375 +err_fin 93581.234375 +sparsity check 0.399999988930566 +time 139.29 +7 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 5133.5068359375 +err_fin 4621.236328125 +sparsity check 0.399999988930566 +time 137.02 +8 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 55519.2421875 +err_fin 39471.8671875 +sparsity check 0.3999999612569809 +time 75.89 +8 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 26569.623046875 +err_fin 21154.78125 +sparsity check 0.399999737739563 +time 1.34 +8 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 9536.263671875 +err_fin 8105.48583984375 +sparsity check 0.399999737739563 +time 1.34 +8 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 1802.17236328125 +err_fin 924.8701171875 +sparsity check 0.3999999612569809 +time 68.79 +8 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 205404.890625 +err_fin 140618.84375 +sparsity check 0.399999988930566 +time 138.85 +8 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 196496.6875 +err_fin 133449.53125 +sparsity check 0.399999988930566 +time 139.16 +8 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 8890.8037109375 +err_fin 8498.9755859375 +sparsity check 0.399999988930566 +time 136.99 +9 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 62084.6328125 +err_fin 46541.3984375 +sparsity check 0.3999999612569809 +time 75.88 +9 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 30079.623046875 +err_fin 25589.6484375 +sparsity check 0.399999737739563 +time 1.35 +9 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 14775.11328125 +err_fin 13083.171875 +sparsity check 0.399999737739563 +time 1.33 +9 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 1073.184326171875 +err_fin 521.8529663085938 +sparsity check 0.3999999612569809 +time 68.79 +9 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 267515.09375 +err_fin 189444.0 +sparsity check 0.399999988930566 +time 138.95 +9 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 254250.46875 +err_fin 178660.0625 +sparsity check 0.399999988930566 +time 139.31 +9 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 9258.24609375 +err_fin 8459.10546875 +sparsity check 0.399999988930566 +time 136.99 +10 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 36642.8828125 +err_fin 27778.05078125 +sparsity check 0.3999999612569809 +time 75.87 +10 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 14996.3779296875 +err_fin 12421.6953125 +sparsity check 0.399999737739563 +time 1.51 +10 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 14389.373046875 +err_fin 12692.5615234375 +sparsity check 0.399999737739563 +time 1.32 +10 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 734.5894775390625 +err_fin 382.73675537109375 +sparsity check 0.3999999612569809 +time 68.80 +10 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 329132.5625 +err_fin 242656.875 +sparsity check 0.399999988930566 +time 138.84 +10 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 311480.8125 +err_fin 228053.5625 +sparsity check 0.399999988930566 +time 139.16 +10 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 10280.35546875 +err_fin 9502.23828125 +sparsity check 0.399999988930566 +time 136.95 +11 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 59513.4375 +err_fin 45776.640625 +sparsity check 0.3999999612569809 +time 75.88 +11 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 30693.546875 +err_fin 25851.52734375 +sparsity check 0.399999737739563 +time 1.35 +11 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 16623.55078125 +err_fin 14633.64453125 +sparsity check 0.399999737739563 +time 1.33 +11 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 1155.1929931640625 +err_fin 583.9664306640625 +sparsity check 0.3999999612569809 +time 68.86 +11 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 355986.09375 +err_fin 266137.0 +sparsity check 0.399999988930566 +time 138.91 +11 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 338211.9375 +err_fin 251343.90625 +sparsity check 0.399999988930566 +time 139.31 +11 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 11358.93359375 +err_fin 10526.056640625 +sparsity check 0.399999988930566 +time 136.99 +12 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 51019.9609375 +err_fin 39177.4921875 +sparsity check 0.3999999612569809 +time 75.86 +12 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 26892.7421875 +err_fin 22232.98046875 +sparsity check 0.399999737739563 +time 1.34 +12 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 12268.361328125 +err_fin 10614.7900390625 +sparsity check 0.399999737739563 +time 1.34 +12 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 1459.8062744140625 +err_fin 724.5511474609375 +sparsity check 0.3999999612569809 +time 68.85 +12 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 352586.125 +err_fin 264577.9375 +sparsity check 0.399999988930566 +time 138.52 +12 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 337908.65625 +err_fin 252293.359375 +sparsity check 0.399999988930566 +time 139.24 +12 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 12551.25 +err_fin 11571.958984375 +sparsity check 0.399999988930566 +time 136.99 +13 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 64947.30859375 +err_fin 50110.44140625 +sparsity check 0.3999999612569809 +time 75.89 +13 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 29089.6328125 +err_fin 24156.20703125 +sparsity check 0.399999737739563 +time 1.35 +13 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 16743.55859375 +err_fin 14742.77734375 +sparsity check 0.399999737739563 +time 1.34 +13 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 3752.18212890625 +err_fin 2130.10791015625 +sparsity check 0.3999999612569809 +time 68.74 +13 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 372413.25 +err_fin 276258.03125 +sparsity check 0.399999988930566 +time 138.88 +13 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 360113.34375 +err_fin 265735.53125 +sparsity check 0.399999988930566 +time 139.38 +13 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 15820.6875 +err_fin 14581.4482421875 +sparsity check 0.399999988930566 +time 137.08 +14 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 108299.484375 +err_fin 84146.359375 +sparsity check 0.3999999612569809 +time 75.87 +14 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 49305.0703125 +err_fin 41669.30078125 +sparsity check 0.399999737739563 +time 1.60 +14 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 23381.0 +err_fin 20784.14453125 +sparsity check 0.399999737739563 +time 1.34 +14 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 4317.4189453125 +err_fin 2290.507568359375 +sparsity check 0.3999999612569809 +time 68.84 +14 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 433300.75 +err_fin 328536.46875 +sparsity check 0.399999988930566 +time 138.93 +14 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 419087.53125 +err_fin 316094.875 +sparsity check 0.399999988930566 +time 139.28 +14 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 17131.1015625 +err_fin 15888.0390625 +sparsity check 0.399999988930566 +time 136.99 +15 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 78850.65625 +err_fin 62189.6796875 +sparsity check 0.3999999612569809 +time 75.85 +15 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 34686.796875 +err_fin 29638.791015625 +sparsity check 0.399999737739563 +time 1.34 +15 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 29270.41015625 +err_fin 26060.15234375 +sparsity check 0.399999737739563 +time 1.34 +15 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 4507.93359375 +err_fin 2499.28173828125 +sparsity check 0.3999999612569809 +time 68.86 +15 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 464740.5625 +err_fin 353674.90625 +sparsity check 0.399999988930566 +time 138.97 +15 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 451478.0625 +err_fin 341646.78125 +sparsity check 0.399999988930566 +time 139.25 +15 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 20231.583984375 +err_fin 18674.9375 +sparsity check 0.399999988930566 +time 137.06 +16 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 77341.2265625 +err_fin 61628.3671875 +sparsity check 0.3999999612569809 +time 75.87 +16 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 35855.0703125 +err_fin 30790.6484375 +sparsity check 0.399999737739563 +time 1.35 +16 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 26911.302734375 +err_fin 24213.021484375 +sparsity check 0.399999737739563 +time 1.34 +16 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 3601.39013671875 +err_fin 2004.331787109375 +sparsity check 0.3999999612569809 +time 68.81 +16 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 491135.875 +err_fin 378446.3125 +sparsity check 0.399999988930566 +time 138.86 +16 mlp.up_proj +Pruning ... +0.39999998467309134 0.2499999701976776 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1452538745832397 0.9709505944546686 1.0 +err_prefin 478407.0 +err_fin 367127.875 +sparsity check 0.39999998467309134 +time 139.24 +16 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 20346.81640625 +err_fin 18934.5859375 +sparsity check 0.399999988930566 +time 137.03 +17 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 99662.6875 +err_fin 78476.28125 +sparsity check 0.3999999612569809 +time 75.85 +17 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 46874.953125 +err_fin 39818.0390625 +sparsity check 0.399999737739563 +time 1.34 +17 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 22943.46875 +err_fin 20219.392578125 +sparsity check 0.399999737739563 +time 1.33 +17 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 6161.6611328125 +err_fin 2948.42724609375 +sparsity check 0.3999999612569809 +time 68.78 +17 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 482081.9375 +err_fin 365778.3125 +sparsity check 0.399999988930566 +time 138.61 +17 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 471164.75 +err_fin 355606.4375 +sparsity check 0.399999988930566 +time 139.27 +17 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 26453.935546875 +err_fin 23957.19140625 +sparsity check 0.399999988930566 +time 136.96 +18 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 93042.890625 +err_fin 73869.5 +sparsity check 0.3999999612569809 +time 75.86 +18 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 41653.34375 +err_fin 35923.51953125 +sparsity check 0.399999737739563 +time 1.35 +18 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 36575.17578125 +err_fin 33034.24609375 +sparsity check 0.399999737739563 +time 1.33 +18 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 6069.55908203125 +err_fin 3101.435302734375 +sparsity check 0.3999999612569809 +time 69.00 +18 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 533570.6875 +err_fin 407169.625 +sparsity check 0.399999988930566 +time 138.95 +18 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 518153.1875 +err_fin 393194.15625 +sparsity check 0.399999988930566 +time 138.80 +18 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 28997.03125 +err_fin 26308.62890625 +sparsity check 0.399999988930566 +time 136.91 +19 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 112864.234375 +err_fin 90097.75 +sparsity check 0.3999999612569809 +time 75.87 +19 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 49256.0 +err_fin 43416.234375 +sparsity check 0.399999737739563 +time 1.58 +19 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 43989.640625 +err_fin 40220.83203125 +sparsity check 0.399999737739563 +time 1.33 +19 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 4828.154296875 +err_fin 2734.393310546875 +sparsity check 0.3999999612569809 +time 68.76 +19 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 582081.125 +err_fin 448252.40625 +sparsity check 0.399999988930566 +time 138.88 +19 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 565899.625 +err_fin 433865.1875 +sparsity check 0.399999988930566 +time 139.30 +19 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 29079.490234375 +err_fin 26786.86328125 +sparsity check 0.399999988930566 +time 137.13 +20 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 100119.140625 +err_fin 80946.3359375 +sparsity check 0.3999999612569809 +time 75.88 +20 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 42183.84375 +err_fin 37111.1953125 +sparsity check 0.399999737739563 +time 1.34 +20 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 36190.59765625 +err_fin 33309.046875 +sparsity check 0.399999737739563 +time 1.34 +20 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 3647.30126953125 +err_fin 1786.774169921875 +sparsity check 0.3999999612569809 +time 68.82 +20 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 627969.375 +err_fin 487027.125 +sparsity check 0.399999988930566 +time 138.88 +20 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 611342.3125 +err_fin 471954.09375 +sparsity check 0.399999988930566 +time 139.23 +20 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 29551.892578125 +err_fin 27422.1015625 +sparsity check 0.399999988930566 +time 136.94 +21 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 61035.453125 +err_fin 49399.328125 +sparsity check 0.3999999612569809 +time 75.91 +21 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 23059.736328125 +err_fin 19896.216796875 +sparsity check 0.399999737739563 +time 1.36 +21 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 30783.630859375 +err_fin 27992.6484375 +sparsity check 0.399999737739563 +time 1.35 +21 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 5241.9404296875 +err_fin 2848.85009765625 +sparsity check 0.3999999612569809 +time 68.77 +21 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 655856.75 +err_fin 509202.71875 +sparsity check 0.399999988930566 +time 138.90 +21 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 641574.125 +err_fin 495852.375 +sparsity check 0.399999988930566 +time 139.24 +21 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 31134.962890625 +err_fin 29017.095703125 +sparsity check 0.399999988930566 +time 136.99 +22 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 58833.91015625 +err_fin 47792.2109375 +sparsity check 0.3999999612569809 +time 75.88 +22 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 26931.833984375 +err_fin 23634.6015625 +sparsity check 0.399999737739563 +time 1.34 +22 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 35195.21875 +err_fin 32346.142578125 +sparsity check 0.399999737739563 +time 1.34 +22 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 3497.6494140625 +err_fin 1750.6719970703125 +sparsity check 0.3999999612569809 +time 68.94 +22 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 689108.0625 +err_fin 535977.375 +sparsity check 0.399999988930566 +time 138.90 +22 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 675351.75 +err_fin 523008.375 +sparsity check 0.399999988930566 +time 138.93 +22 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 31923.92578125 +err_fin 29884.91796875 +sparsity check 0.399999988930566 +time 136.99 +23 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 86296.59375 +err_fin 69640.4375 +sparsity check 0.3999999612569809 +time 75.90 +23 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 42529.94140625 +err_fin 37186.515625 +sparsity check 0.399999737739563 +time 1.35 +23 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 36995.01171875 +err_fin 33991.26953125 +sparsity check 0.399999737739563 +time 1.32 +23 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 4822.01953125 +err_fin 2533.981689453125 +sparsity check 0.3999999612569809 +time 68.82 +23 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 707328.75 +err_fin 551112.6875 +sparsity check 0.399999988930566 +time 138.60 +23 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 694757.25 +err_fin 538941.1875 +sparsity check 0.399999988930566 +time 138.92 +23 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 33092.3984375 +err_fin 30912.99609375 +sparsity check 0.399999988930566 +time 136.92 +24 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 76317.9140625 +err_fin 61327.4609375 +sparsity check 0.3999999612569809 +time 75.85 +24 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 37193.29296875 +err_fin 32230.69921875 +sparsity check 0.399999737739563 +time 1.34 +24 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 27487.359375 +err_fin 24762.923828125 +sparsity check 0.399999737739563 +time 1.34 +24 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 4445.7109375 +err_fin 2407.4091796875 +sparsity check 0.3999999612569809 +time 68.76 +24 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 686851.3125 +err_fin 536282.875 +sparsity check 0.399999988930566 +time 138.85 +24 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 678956.625 +err_fin 528484.25 +sparsity check 0.399999988930566 +time 138.90 +24 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 33274.2890625 +err_fin 31157.46484375 +sparsity check 0.399999988930566 +time 137.00 +25 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 86498.2265625 +err_fin 69604.9609375 +sparsity check 0.3999999612569809 +time 76.22 +25 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 41965.0625 +err_fin 35911.8828125 +sparsity check 0.399999737739563 +time 1.59 +25 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 28663.859375 +err_fin 25574.87109375 +sparsity check 0.399999737739563 +time 1.35 +25 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 11528.775390625 +err_fin 6194.421875 +sparsity check 0.3999999612569809 +time 69.13 +25 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 645389.6875 +err_fin 491102.8125 +sparsity check 0.399999988930566 +time 139.51 +25 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 642249.5625 +err_fin 486831.65625 +sparsity check 0.399999988930566 +time 139.87 +25 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 45862.3359375 +err_fin 42062.7734375 +sparsity check 0.399999988930566 +time 137.54 +26 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 128960.375 +err_fin 102998.5 +sparsity check 0.3999999612569809 +time 76.19 +26 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 61280.0390625 +err_fin 53391.234375 +sparsity check 0.399999737739563 +time 1.36 +26 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 43186.40625 +err_fin 38870.4375 +sparsity check 0.399999737739563 +time 1.35 +26 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 11384.138671875 +err_fin 6428.9443359375 +sparsity check 0.3999999612569809 +time 69.09 +26 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 738151.125 +err_fin 568121.5 +sparsity check 0.399999988930566 +time 139.56 +26 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 731963.625 +err_fin 560884.875 +sparsity check 0.399999988930566 +time 139.79 +26 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 45562.1796875 +err_fin 42632.48046875 +sparsity check 0.399999988930566 +time 137.47 +27 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 57331.1796875 +err_fin 46671.7265625 +sparsity check 0.3999999612569809 +time 76.23 +27 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 27610.8828125 +err_fin 24122.611328125 +sparsity check 0.399999737739563 +time 1.36 +27 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 37264.8125 +err_fin 34419.55078125 +sparsity check 0.399999737739563 +time 1.35 +27 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 4909.48095703125 +err_fin 2435.118896484375 +sparsity check 0.3999999612569809 +time 69.12 +27 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 784413.625 +err_fin 607624.0 +sparsity check 0.399999988930566 +time 139.61 +27 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 776550.625 +err_fin 598907.0625 +sparsity check 0.399999988930566 +time 139.78 +27 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 47765.3046875 +err_fin 44880.6015625 +sparsity check 0.399999988930566 +time 137.58 +28 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 105145.84375 +err_fin 85558.984375 +sparsity check 0.3999999612569809 +time 76.22 +28 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 53635.23828125 +err_fin 47616.234375 +sparsity check 0.399999737739563 +time 1.35 +28 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 56563.3359375 +err_fin 52054.6796875 +sparsity check 0.399999737739563 +time 1.34 +28 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 8209.810546875 +err_fin 4685.265625 +sparsity check 0.3999999612569809 +time 69.12 +28 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 828677.625 +err_fin 646741.625 +sparsity check 0.399999988930566 +time 139.55 +28 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 820765.8125 +err_fin 637406.125 +sparsity check 0.399999988930566 +time 139.51 +28 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 50522.453125 +err_fin 47658.9921875 +sparsity check 0.399999988930566 +time 137.72 +29 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 103383.8203125 +err_fin 84504.015625 +sparsity check 0.3999999612569809 +time 76.23 +29 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 47313.234375 +err_fin 41837.9296875 +sparsity check 0.399999737739563 +time 1.34 +29 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 61706.8125 +err_fin 56952.3203125 +sparsity check 0.399999737739563 +time 1.35 +29 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 9612.470703125 +err_fin 5473.2080078125 +sparsity check 0.3999999612569809 +time 69.11 +29 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 877926.1875 +err_fin 688177.9375 +sparsity check 0.399999988930566 +time 139.71 +29 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 868305.0625 +err_fin 677128.125 +sparsity check 0.399999988930566 +time 140.03 +29 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 53114.984375 +err_fin 50312.1875 +sparsity check 0.399999988930566 +time 137.67 +30 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 52442.41015625 +err_fin 43001.96484375 +sparsity check 0.3999999612569809 +time 76.20 +30 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 19952.232421875 +err_fin 17613.3671875 +sparsity check 0.399999737739563 +time 1.36 +30 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 56886.75 +err_fin 52822.890625 +sparsity check 0.399999737739563 +time 1.34 +30 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 4728.763671875 +err_fin 2648.462890625 +sparsity check 0.3999999612569809 +time 69.11 +30 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 932318.375 +err_fin 734010.1875 +sparsity check 0.399999988930566 +time 139.44 +30 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 918637.375 +err_fin 719568.6875 +sparsity check 0.399999988930566 +time 139.80 +30 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 54958.6796875 +err_fin 52124.546875 +sparsity check 0.399999988930566 +time 137.53 +31 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 108229.6796875 +err_fin 88773.578125 +sparsity check 0.3999999612569809 +time 76.20 +31 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 48303.6796875 +err_fin 42885.78515625 +sparsity check 0.399999737739563 +time 1.55 +31 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 60604.79296875 +err_fin 55323.4296875 +sparsity check 0.399999737739563 +time 1.36 +31 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 9769.052734375 +err_fin 5960.439453125 +sparsity check 0.3999999612569809 +time 69.11 +31 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 973989.125 +err_fin 772843.5625 +sparsity check 0.399999988930566 +time 139.68 +31 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 957890.5625 +err_fin 756552.75 +sparsity check 0.399999988930566 +time 139.93 +31 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 56087.5 +err_fin 53318.90625 +sparsity check 0.399999988930566 +time 137.71 +32 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 125380.25 +err_fin 103287.515625 +sparsity check 0.3999999612569809 +time 76.21 +32 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 58345.3125 +err_fin 52415.5078125 +sparsity check 0.399999737739563 +time 1.36 +32 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 54671.87890625 +err_fin 50620.2890625 +sparsity check 0.399999737739563 +time 1.34 +32 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 6489.8603515625 +err_fin 3653.179931640625 +sparsity check 0.3999999612569809 +time 69.26 +32 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 997853.0 +err_fin 789553.6875 +sparsity check 0.399999988930566 +time 139.64 +32 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 983279.6875 +err_fin 774578.5625 +sparsity check 0.399999988930566 +time 139.48 +32 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 59351.0078125 +err_fin 56297.09375 +sparsity check 0.399999988930566 +time 137.38 +33 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 178617.0625 +err_fin 145935.265625 +sparsity check 0.3999999612569809 +time 76.17 +33 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 77837.015625 +err_fin 69017.921875 +sparsity check 0.399999737739563 +time 1.54 +33 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 60248.34375 +err_fin 55033.7421875 +sparsity check 0.399999737739563 +time 1.35 +33 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 12679.162109375 +err_fin 7037.369140625 +sparsity check 0.3999999612569809 +time 69.08 +33 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1008519.125 +err_fin 788506.9375 +sparsity check 0.399999988930566 +time 139.55 +33 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 996504.875 +err_fin 775590.75 +sparsity check 0.399999988930566 +time 139.99 +33 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 67620.734375 +err_fin 63801.515625 +sparsity check 0.399999988930566 +time 137.68 +34 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 60917.2109375 +err_fin 49612.73046875 +sparsity check 0.3999999612569809 +time 76.20 +34 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 25374.015625 +err_fin 21585.5703125 +sparsity check 0.399999737739563 +time 1.55 +34 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 35895.7421875 +err_fin 32282.359375 +sparsity check 0.399999737739563 +time 1.34 +34 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 8630.1640625 +err_fin 4882.9326171875 +sparsity check 0.3999999612569809 +time 69.16 +34 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1040259.625 +err_fin 817221.8125 +sparsity check 0.399999988930566 +time 139.66 +34 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1027032.3125 +err_fin 803372.0 +sparsity check 0.399999988930566 +time 139.64 +34 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 73886.59375 +err_fin 70010.28125 +sparsity check 0.399999988930566 +time 137.40 +35 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 104002.0 +err_fin 85301.984375 +sparsity check 0.3999999612569809 +time 76.16 +35 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 41951.890625 +err_fin 36891.9296875 +sparsity check 0.399999737739563 +time 1.35 +35 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 49134.27734375 +err_fin 45119.69140625 +sparsity check 0.399999737739563 +time 1.34 +35 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 10896.05078125 +err_fin 5893.8515625 +sparsity check 0.3999999612569809 +time 69.19 +35 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1104395.25 +err_fin 870039.1875 +sparsity check 0.399999988930566 +time 139.63 +35 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1090234.75 +err_fin 854829.125 +sparsity check 0.399999988930566 +time 139.66 +35 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 79348.03125 +err_fin 75311.671875 +sparsity check 0.399999988930566 +time 137.72 +36 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 99297.703125 +err_fin 81572.9140625 +sparsity check 0.3999999612569809 +time 76.18 +36 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 41102.11328125 +err_fin 36326.3125 +sparsity check 0.399999737739563 +time 1.34 +36 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 47730.203125 +err_fin 44085.8984375 +sparsity check 0.399999737739563 +time 1.33 +36 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 6881.4716796875 +err_fin 3650.48974609375 +sparsity check 0.3999999612569809 +time 69.09 +36 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1129398.75 +err_fin 888594.625 +sparsity check 0.399999988930566 +time 139.26 +36 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1116524.375 +err_fin 874378.0 +sparsity check 0.399999988930566 +time 139.82 +36 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 88429.6796875 +err_fin 83697.421875 +sparsity check 0.399999988930566 +time 137.34 +37 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 158881.21875 +err_fin 129712.421875 +sparsity check 0.3999999612569809 +time 76.18 +37 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 69050.1796875 +err_fin 61238.65625 +sparsity check 0.399999737739563 +time 1.35 +37 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 61172.625 +err_fin 56369.76953125 +sparsity check 0.399999737739563 +time 1.36 +37 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 12933.6025390625 +err_fin 6396.13232421875 +sparsity check 0.3999999612569809 +time 69.12 +37 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1179273.25 +err_fin 921774.0 +sparsity check 0.399999988930566 +time 139.63 +37 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1166277.25 +err_fin 907425.0 +sparsity check 0.399999988930566 +time 139.56 +37 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 96837.4765625 +err_fin 91560.8828125 +sparsity check 0.399999988930566 +time 137.60 +38 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 178652.953125 +err_fin 144669.875 +sparsity check 0.3999999612569809 +time 76.19 +38 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 76139.265625 +err_fin 66612.6875 +sparsity check 0.399999737739563 +time 1.54 +38 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 63959.2890625 +err_fin 57992.68359375 +sparsity check 0.399999737739563 +time 1.35 +38 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 20120.30078125 +err_fin 10196.248046875 +sparsity check 0.3999999612569809 +time 69.11 +38 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1214896.0 +err_fin 946970.75 +sparsity check 0.399999988930566 +time 139.59 +38 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1203742.375 +err_fin 934144.5 +sparsity check 0.399999988930566 +time 140.02 +38 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 108883.90625 +err_fin 102736.2578125 +sparsity check 0.399999988930566 +time 137.56 +39 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 218867.359375 +err_fin 176544.625 +sparsity check 0.3999999612569809 +time 76.22 +39 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 101190.484375 +err_fin 89251.7421875 +sparsity check 0.399999737739563 +time 1.54 +39 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 69442.984375 +err_fin 63092.515625 +sparsity check 0.399999737739563 +time 1.36 +39 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 24446.88671875 +err_fin 11920.4208984375 +sparsity check 0.3999999612569809 +time 69.10 +39 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1224954.75 +err_fin 943958.375 +sparsity check 0.399999988930566 +time 139.67 +39 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1215661.75 +err_fin 932604.3125 +sparsity check 0.399999988930566 +time 140.07 +39 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 132942.28125 +err_fin 123923.03125 +sparsity check 0.399999988930566 +time 137.73 +40 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 204391.59375 +err_fin 162348.875 +sparsity check 0.3999999612569809 +time 76.07 +40 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 87739.265625 +err_fin 77051.0703125 +sparsity check 0.399999737739563 +time 1.37 +40 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 79062.90625 +err_fin 70670.21875 +sparsity check 0.399999737739563 +time 1.35 +40 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 36004.8515625 +err_fin 20871.0546875 +sparsity check 0.3999999612569809 +time 69.02 +40 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1269506.0 +err_fin 963220.25 +sparsity check 0.399999988930566 +time 139.36 +40 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1259459.25 +err_fin 950523.0625 +sparsity check 0.399999988930566 +time 139.79 +40 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 150879.421875 +err_fin 140773.90625 +sparsity check 0.399999988930566 +time 137.17 +41 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 164681.625 +err_fin 129744.1875 +sparsity check 0.3999999612569809 +time 76.05 +41 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 68598.9609375 +err_fin 59495.703125 +sparsity check 0.399999737739563 +time 1.36 +41 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 69201.7265625 +err_fin 62056.1484375 +sparsity check 0.399999737739563 +time 1.34 +41 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 39444.0546875 +err_fin 20136.62890625 +sparsity check 0.3999999612569809 +time 68.96 +41 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1322639.75 +err_fin 983274.25 +sparsity check 0.399999988930566 +time 139.18 +41 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1289896.75 +err_fin 952170.625 +sparsity check 0.399999988930566 +time 139.55 +41 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 177016.734375 +err_fin 163459.8125 +sparsity check 0.399999988930566 +time 137.58 +42 self_attn.q_proj +Pruning ... +0.3999999463558197 0.15999996662139893 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497204719225 0.9709505944546686 1.0 +err_prefin 190901.5 +err_fin 148293.4375 +sparsity check 0.3999999463558197 +time 76.04 +42 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 74717.453125 +err_fin 64965.5390625 +sparsity check 0.399999737739563 +time 1.34 +42 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 85384.0625 +err_fin 77046.2265625 +sparsity check 0.399999737739563 +time 1.34 +42 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 35428.703125 +err_fin 19269.55078125 +sparsity check 0.3999999612569809 +time 68.99 +42 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1427233.25 +err_fin 1051851.5 +sparsity check 0.399999988930566 +time 139.40 +42 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1362717.5 +err_fin 997086.8125 +sparsity check 0.399999988930566 +time 139.50 +42 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 192777.53125 +err_fin 177533.296875 +sparsity check 0.399999988930566 +time 137.38 +43 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 131945.078125 +err_fin 102151.328125 +sparsity check 0.3999999612569809 +time 76.02 +43 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 60967.8359375 +err_fin 52683.7265625 +sparsity check 0.399999737739563 +time 1.34 +43 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 66663.3828125 +err_fin 58915.53125 +sparsity check 0.399999737739563 +time 1.33 +43 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 26005.5078125 +err_fin 15426.9609375 +sparsity check 0.3999999612569809 +time 68.93 +43 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1484124.125 +err_fin 1087662.0 +sparsity check 0.399999988930566 +time 139.29 +43 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1403686.25 +err_fin 1020918.5625 +sparsity check 0.399999988930566 +time 139.63 +43 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 213179.8125 +err_fin 195941.1875 +sparsity check 0.399999988930566 +time 137.40 +44 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 239021.375 +err_fin 183875.03125 +sparsity check 0.3999999612569809 +time 76.07 +44 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 78612.90625 +err_fin 69156.46875 +sparsity check 0.399999737739563 +time 1.36 +44 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 103914.4921875 +err_fin 94715.125 +sparsity check 0.399999737739563 +time 1.34 +44 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 64961.6015625 +err_fin 38213.140625 +sparsity check 0.3999999612569809 +time 69.04 +44 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1625958.0 +err_fin 1176143.25 +sparsity check 0.399999988930566 +time 138.99 +44 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1487921.75 +err_fin 1065628.0 +sparsity check 0.399999988930566 +time 139.33 +44 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 248835.6875 +err_fin 224877.890625 +sparsity check 0.399999988930566 +time 137.38 +45 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 149267.625 +err_fin 113006.1171875 +sparsity check 0.3999999612569809 +time 76.08 +45 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 64247.765625 +err_fin 55482.5546875 +sparsity check 0.399999737739563 +time 1.56 +45 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 101248.5 +err_fin 91964.640625 +sparsity check 0.399999737739563 +time 1.35 +45 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 19453.009765625 +err_fin 11711.2490234375 +sparsity check 0.3999999612569809 +time 68.94 +45 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1761874.75 +err_fin 1274789.375 +sparsity check 0.399999988930566 +time 139.35 +45 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1593797.0 +err_fin 1140697.625 +sparsity check 0.399999988930566 +time 139.61 +45 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 257242.65625 +err_fin 232960.640625 +sparsity check 0.399999988930566 +time 137.42 +46 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 126243.0859375 +err_fin 95670.484375 +sparsity check 0.3999999612569809 +time 76.07 +46 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 51350.12109375 +err_fin 45076.40625 +sparsity check 0.399999737739563 +time 1.35 +46 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 87394.296875 +err_fin 80037.828125 +sparsity check 0.399999737739563 +time 1.34 +46 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 25115.828125 +err_fin 15321.5966796875 +sparsity check 0.3999999612569809 +time 69.00 +46 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1894083.375 +err_fin 1376017.5 +sparsity check 0.399999988930566 +time 139.33 +46 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1689111.25 +err_fin 1214059.25 +sparsity check 0.399999988930566 +time 139.33 +46 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 258656.53125 +err_fin 235855.484375 +sparsity check 0.399999988930566 +time 136.96 +47 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 172369.34375 +err_fin 131557.046875 +sparsity check 0.3999999612569809 +time 76.03 +47 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 74734.03125 +err_fin 65136.32421875 +sparsity check 0.399999737739563 +time 1.35 +47 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 85362.28125 +err_fin 77233.171875 +sparsity check 0.399999737739563 +time 1.34 +47 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 40525.57421875 +err_fin 19895.736328125 +sparsity check 0.3999999612569809 +time 68.99 +47 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1963942.25 +err_fin 1412017.5 +sparsity check 0.399999988930566 +time 139.35 +47 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1727577.875 +err_fin 1228149.125 +sparsity check 0.399999988930566 +time 139.30 +47 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 290418.71875 +err_fin 261644.28125 +sparsity check 0.399999988930566 +time 137.45 +48 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 85049.4375 +err_fin 64390.56640625 +sparsity check 0.3999999612569809 +time 76.02 +48 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 28543.0703125 +err_fin 24567.41015625 +sparsity check 0.399999737739563 +time 1.34 +48 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 83328.1875 +err_fin 75792.25 +sparsity check 0.399999737739563 +time 1.33 +48 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 26175.41796875 +err_fin 15631.171875 +sparsity check 0.3999999612569809 +time 68.92 +48 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2057025.5 +err_fin 1472035.75 +sparsity check 0.399999988930566 +time 139.17 +48 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1801691.125 +err_fin 1274622.875 +sparsity check 0.399999988930566 +time 139.25 +48 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 290596.0 +err_fin 263112.4375 +sparsity check 0.399999988930566 +time 137.02 +49 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 47123.171875 +err_fin 35557.078125 +sparsity check 0.3999999612569809 +time 76.01 +49 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 19124.44921875 +err_fin 16366.7763671875 +sparsity check 0.399999737739563 +time 1.54 +49 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 67332.96875 +err_fin 60125.21875 +sparsity check 0.399999737739563 +time 1.34 +49 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 10219.5615234375 +err_fin 5539.33056640625 +sparsity check 0.3999999612569809 +time 68.93 +49 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2140271.75 +err_fin 1531680.75 +sparsity check 0.399999988930566 +time 139.09 +49 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1872370.875 +err_fin 1324871.875 +sparsity check 0.399999988930566 +time 139.61 +49 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 291743.5625 +err_fin 265395.375 +sparsity check 0.399999988930566 +time 137.14 +50 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 76211.046875 +err_fin 57330.80859375 +sparsity check 0.3999999612569809 +time 76.01 +50 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 25855.921875 +err_fin 22361.712890625 +sparsity check 0.399999737739563 +time 1.36 +50 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 86781.0078125 +err_fin 77938.0703125 +sparsity check 0.399999737739563 +time 1.34 +50 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 19982.37890625 +err_fin 11792.8759765625 +sparsity check 0.3999999612569809 +time 68.97 +50 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2224319.5 +err_fin 1593539.125 +sparsity check 0.399999988930566 +time 139.31 +50 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1938637.0 +err_fin 1373849.125 +sparsity check 0.399999988930566 +time 139.27 +50 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 293277.4375 +err_fin 267518.5625 +sparsity check 0.399999988930566 +time 137.13 +51 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 99279.4375 +err_fin 74759.390625 +sparsity check 0.3999999612569809 +time 76.04 +51 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 38646.03125 +err_fin 33657.5546875 +sparsity check 0.399999737739563 +time 1.54 +51 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 90655.71875 +err_fin 82529.46875 +sparsity check 0.399999737739563 +time 1.35 +51 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 17904.578125 +err_fin 9912.552734375 +sparsity check 0.3999999612569809 +time 68.92 +51 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2288922.75 +err_fin 1644780.375 +sparsity check 0.399999988930566 +time 139.32 +51 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1998986.0 +err_fin 1421125.75 +sparsity check 0.399999988930566 +time 139.34 +51 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 294801.0 +err_fin 269504.6875 +sparsity check 0.399999988930566 +time 137.07 +52 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 167389.796875 +err_fin 126759.140625 +sparsity check 0.3999999612569809 +time 76.02 +52 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 50601.26953125 +err_fin 44585.43359375 +sparsity check 0.399999737739563 +time 1.34 +52 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 103333.921875 +err_fin 93834.59375 +sparsity check 0.399999737739563 +time 1.33 +52 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 34392.21875 +err_fin 19500.4609375 +sparsity check 0.3999999612569809 +time 68.97 +52 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2348911.25 +err_fin 1694721.0 +sparsity check 0.399999988930566 +time 139.21 +52 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2048677.125 +err_fin 1462018.125 +sparsity check 0.399999988930566 +time 139.61 +52 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 300744.8125 +err_fin 275488.0625 +sparsity check 0.399999988930566 +time 137.40 +53 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 50624.0234375 +err_fin 38541.99609375 +sparsity check 0.3999999612569809 +time 76.05 +53 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 15209.3046875 +err_fin 13159.8466796875 +sparsity check 0.399999737739563 +time 1.36 +53 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 86490.6875 +err_fin 78445.734375 +sparsity check 0.399999737739563 +time 1.34 +53 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 15345.724609375 +err_fin 8511.248046875 +sparsity check 0.3999999612569809 +time 69.03 +53 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2411984.5 +err_fin 1738109.25 +sparsity check 0.399999988930566 +time 139.27 +53 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2108468.0 +err_fin 1502701.0 +sparsity check 0.399999988930566 +time 139.70 +53 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 303820.53125 +err_fin 278497.3125 +sparsity check 0.399999988930566 +time 136.95 +54 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 59636.765625 +err_fin 45276.53515625 +sparsity check 0.3999999612569809 +time 76.04 +54 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 17630.81640625 +err_fin 15097.21484375 +sparsity check 0.399999737739563 +time 1.36 +54 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 89247.875 +err_fin 81014.6015625 +sparsity check 0.399999737739563 +time 1.36 +54 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 17046.90234375 +err_fin 9478.064453125 +sparsity check 0.3999999612569809 +time 69.01 +54 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2473427.25 +err_fin 1786240.75 +sparsity check 0.399999988930566 +time 139.28 +54 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2167564.75 +err_fin 1548563.5 +sparsity check 0.399999988930566 +time 139.63 +54 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 303654.15625 +err_fin 279146.78125 +sparsity check 0.399999988930566 +time 137.03 +55 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 97923.671875 +err_fin 74614.84375 +sparsity check 0.3999999612569809 +time 76.01 +55 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 34288.24609375 +err_fin 29903.81640625 +sparsity check 0.399999737739563 +time 1.36 +55 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 116934.5625 +err_fin 105758.9375 +sparsity check 0.399999737739563 +time 1.34 +55 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 34275.53515625 +err_fin 18865.6484375 +sparsity check 0.3999999612569809 +time 68.94 +55 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2522007.25 +err_fin 1825207.75 +sparsity check 0.399999988930566 +time 138.95 +55 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2227084.75 +err_fin 1594715.5 +sparsity check 0.399999988930566 +time 139.14 +55 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 310645.46875 +err_fin 286460.5 +sparsity check 0.399999988930566 +time 137.28 +56 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 73575.875 +err_fin 56280.72265625 +sparsity check 0.3999999612569809 +time 76.01 +56 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 27285.4453125 +err_fin 23696.5234375 +sparsity check 0.399999737739563 +time 1.35 +56 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 82452.4453125 +err_fin 74864.8359375 +sparsity check 0.399999737739563 +time 1.34 +56 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 17726.39453125 +err_fin 9315.02734375 +sparsity check 0.3999999612569809 +time 68.94 +56 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2563127.5 +err_fin 1853663.125 +sparsity check 0.399999988930566 +time 139.19 +56 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2268121.75 +err_fin 1623319.0 +sparsity check 0.399999988930566 +time 139.50 +56 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 314845.40625 +err_fin 290457.625 +sparsity check 0.399999988930566 +time 137.34 +57 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 65624.921875 +err_fin 49837.5703125 +sparsity check 0.3999999612569809 +time 76.11 +57 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 20926.7578125 +err_fin 18283.404296875 +sparsity check 0.399999737739563 +time 1.36 +57 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 106189.296875 +err_fin 96529.4375 +sparsity check 0.399999737739563 +time 1.35 +57 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 18766.31640625 +err_fin 10523.56640625 +sparsity check 0.3999999612569809 +time 68.92 +57 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2649966.0 +err_fin 1919384.75 +sparsity check 0.399999988930566 +time 139.30 +57 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2348224.25 +err_fin 1684120.25 +sparsity check 0.399999988930566 +time 139.40 +57 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 320348.4375 +err_fin 296307.125 +sparsity check 0.399999988930566 +time 137.00 +58 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 36040.35546875 +err_fin 27597.578125 +sparsity check 0.3999999612569809 +time 76.09 +58 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 10517.08203125 +err_fin 8903.44140625 +sparsity check 0.399999737739563 +time 1.36 +58 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 75836.828125 +err_fin 68298.859375 +sparsity check 0.399999737739563 +time 1.34 +58 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 12667.3779296875 +err_fin 7178.7724609375 +sparsity check 0.3999999612569809 +time 68.97 +58 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2710278.25 +err_fin 1967515.5 +sparsity check 0.399999988930566 +time 139.29 +58 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2404047.75 +err_fin 1729631.125 +sparsity check 0.399999988930566 +time 139.36 +58 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 322225.75 +err_fin 298635.03125 +sparsity check 0.399999988930566 +time 137.06 +59 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 46747.12109375 +err_fin 35650.5703125 +sparsity check 0.3999999612569809 +time 76.05 +59 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 13351.0546875 +err_fin 11528.861328125 +sparsity check 0.399999737739563 +time 1.36 +59 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 84195.9375 +err_fin 75407.7421875 +sparsity check 0.399999737739563 +time 1.35 +59 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 12003.314453125 +err_fin 6581.07958984375 +sparsity check 0.3999999612569809 +time 69.00 +59 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2774652.0 +err_fin 2019904.25 +sparsity check 0.399999988930566 +time 139.35 +59 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2467993.75 +err_fin 1779848.75 +sparsity check 0.399999988930566 +time 139.19 +59 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 322592.5625 +err_fin 299847.0 +sparsity check 0.399999988930566 +time 137.37 +60 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 7381.94287109375 +err_fin 5601.66162109375 +sparsity check 0.3999999612569809 +time 76.11 +60 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 1925.58837890625 +err_fin 1598.54248046875 +sparsity check 0.399999737739563 +time 1.36 +60 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 45933.5 +err_fin 41093.85546875 +sparsity check 0.399999737739563 +time 1.34 +60 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 7170.42529296875 +err_fin 4106.572265625 +sparsity check 0.3999999612569809 +time 68.98 +60 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2818281.25 +err_fin 2060159.5 +sparsity check 0.399999988930566 +time 139.37 +60 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2518651.75 +err_fin 1824972.25 +sparsity check 0.399999988930566 +time 139.48 +60 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 321096.96875 +err_fin 299192.5625 +sparsity check 0.399999988930566 +time 137.28 +61 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 29229.53515625 +err_fin 22513.853515625 +sparsity check 0.3999999612569809 +time 76.08 +61 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 7946.28515625 +err_fin 6801.779296875 +sparsity check 0.399999737739563 +time 1.36 +61 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 77985.5859375 +err_fin 70779.703125 +sparsity check 0.399999737739563 +time 1.35 +61 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 11768.6376953125 +err_fin 6725.62255859375 +sparsity check 0.3999999612569809 +time 69.17 +61 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2873632.25 +err_fin 2103654.5 +sparsity check 0.399999988930566 +time 139.37 +61 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2576728.25 +err_fin 1871461.75 +sparsity check 0.399999988930566 +time 139.56 +61 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 322479.8125 +err_fin 301094.6875 +sparsity check 0.399999988930566 +time 137.41 +62 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 31340.5 +err_fin 24200.8046875 +sparsity check 0.3999999612569809 +time 76.05 +62 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 9445.275390625 +err_fin 8100.333984375 +sparsity check 0.399999737739563 +time 1.35 +62 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 84320.0 +err_fin 75629.84375 +sparsity check 0.399999737739563 +time 1.35 +62 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 9772.4111328125 +err_fin 4984.01513671875 +sparsity check 0.3999999612569809 +time 68.92 +62 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2936760.0 +err_fin 2158155.0 +sparsity check 0.399999988930566 +time 139.20 +62 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2647007.5 +err_fin 1928893.0 +sparsity check 0.399999988930566 +time 139.46 +62 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 325873.9375 +err_fin 304662.15625 +sparsity check 0.399999988930566 +time 137.08 +63 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 28726.470703125 +err_fin 22369.49609375 +sparsity check 0.3999999612569809 +time 76.04 +63 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 7172.8134765625 +err_fin 6141.5068359375 +sparsity check 0.399999737739563 +time 1.36 +63 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 65172.89453125 +err_fin 57633.515625 +sparsity check 0.399999737739563 +time 1.34 +63 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 7923.49365234375 +err_fin 4491.5146484375 +sparsity check 0.3999999612569809 +time 68.91 +63 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3004869.5 +err_fin 2214143.0 +sparsity check 0.399999988930566 +time 139.28 +63 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2710337.0 +err_fin 1982892.25 +sparsity check 0.399999988930566 +time 139.29 +63 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 330092.8125 +err_fin 309002.21875 +sparsity check 0.399999988930566 +time 137.39 +64 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 54454.6015625 +err_fin 42228.97265625 +sparsity check 0.3999999612569809 +time 76.02 +64 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 16738.142578125 +err_fin 14420.96484375 +sparsity check 0.399999737739563 +time 1.37 +64 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 95227.5859375 +err_fin 86672.1171875 +sparsity check 0.399999737739563 +time 1.36 +64 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 16644.689453125 +err_fin 8670.3564453125 +sparsity check 0.3999999612569809 +time 68.92 +64 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3067880.0 +err_fin 2264808.0 +sparsity check 0.399999988930566 +time 139.25 +64 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2776968.5 +err_fin 2035704.0 +sparsity check 0.399999988930566 +time 139.33 +64 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 335391.875 +err_fin 314201.5 +sparsity check 0.399999988930566 +time 137.21 +65 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 14516.8232421875 +err_fin 11307.5546875 +sparsity check 0.3999999612569809 +time 76.05 +65 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 3333.04736328125 +err_fin 2781.57666015625 +sparsity check 0.399999737739563 +time 1.36 +65 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 52414.85546875 +err_fin 46795.5859375 +sparsity check 0.399999737739563 +time 1.35 +65 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 6793.7373046875 +err_fin 3684.34521484375 +sparsity check 0.3999999612569809 +time 68.92 +65 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3121126.25 +err_fin 2312601.5 +sparsity check 0.399999988930566 +time 138.97 +65 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2839029.0 +err_fin 2089455.75 +sparsity check 0.399999988930566 +time 139.24 +65 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 341135.375 +err_fin 320050.1875 +sparsity check 0.399999988930566 +time 137.38 +66 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 24849.8046875 +err_fin 19324.787109375 +sparsity check 0.3999999612569809 +time 76.07 +66 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 6448.6796875 +err_fin 5454.6904296875 +sparsity check 0.399999737739563 +time 1.36 +66 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 69806.2421875 +err_fin 63193.6640625 +sparsity check 0.399999737739563 +time 1.34 +66 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 14557.0380859375 +err_fin 8636.525390625 +sparsity check 0.3999999612569809 +time 68.94 +66 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3198090.5 +err_fin 2368442.5 +sparsity check 0.399999988930566 +time 139.30 +66 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2925024.5 +err_fin 2152401.0 +sparsity check 0.399999988930566 +time 139.71 +66 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 356939.4375 +err_fin 334807.625 +sparsity check 0.399999988930566 +time 137.26 +67 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 11913.30859375 +err_fin 9233.849609375 +sparsity check 0.3999999612569809 +time 76.02 +67 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 2945.35595703125 +err_fin 2397.56005859375 +sparsity check 0.399999737739563 +time 1.36 +67 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 27783.22265625 +err_fin 24270.81640625 +sparsity check 0.399999737739563 +time 1.35 +67 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 4088.1806640625 +err_fin 1924.853515625 +sparsity check 0.3999999612569809 +time 68.91 +67 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3231225.0 +err_fin 2397659.0 +sparsity check 0.399999988930566 +time 139.37 +67 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2977180.75 +err_fin 2195135.0 +sparsity check 0.399999988930566 +time 139.31 +67 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 360001.875 +err_fin 337752.40625 +sparsity check 0.399999988930566 +time 137.40 +68 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 104838.203125 +err_fin 81429.46875 +sparsity check 0.3999999612569809 +time 76.05 +68 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 38504.1484375 +err_fin 33724.22265625 +sparsity check 0.399999737739563 +time 1.35 +68 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 137444.734375 +err_fin 125453.953125 +sparsity check 0.399999737739563 +time 1.34 +68 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 15373.6875 +err_fin 8905.60546875 +sparsity check 0.3999999612569809 +time 68.97 +68 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3348988.5 +err_fin 2488614.0 +sparsity check 0.399999988930566 +time 139.36 +68 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3100822.5 +err_fin 2292061.0 +sparsity check 0.399999988930566 +time 139.35 +68 mlp.down_proj +Pruning ... +0.39999998467309134 0.2499999850988388 0.32857141750199453 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253876941607 0.9709505944546686 1.0 +err_prefin 384532.09375 +err_fin 360141.5625 +sparsity check 0.39999998467309134 +time 137.33 +69 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 179347.78125 +err_fin 139475.03125 +sparsity check 0.3999999612569809 +time 76.02 +69 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 86929.484375 +err_fin 76924.859375 +sparsity check 0.399999737739563 +time 1.54 +69 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 180458.5 +err_fin 162700.0 +sparsity check 0.399999737739563 +time 1.34 +69 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 24603.837890625 +err_fin 13488.77734375 +sparsity check 0.3999999612569809 +time 68.91 +69 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3413958.25 +err_fin 2536731.5 +sparsity check 0.399999988930566 +time 139.29 +69 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3193894.5 +err_fin 2361396.0 +sparsity check 0.399999988930566 +time 139.77 +69 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 403397.5 +err_fin 378025.625 +sparsity check 0.399999988930566 +time 137.02 +70 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 100971.796875 +err_fin 78724.796875 +sparsity check 0.3999999612569809 +time 76.07 +70 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 38836.046875 +err_fin 34014.4296875 +sparsity check 0.399999737739563 +time 1.55 +70 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 102763.703125 +err_fin 92557.21875 +sparsity check 0.399999737739563 +time 1.34 +70 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 18146.841796875 +err_fin 9677.0751953125 +sparsity check 0.3999999612569809 +time 68.98 +70 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3491533.0 +err_fin 2594608.0 +sparsity check 0.399999988930566 +time 139.01 +70 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3285514.25 +err_fin 2429184.5 +sparsity check 0.399999988930566 +time 139.71 +70 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 430509.3125 +err_fin 402992.625 +sparsity check 0.399999988930566 +time 137.35 +71 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 139824.796875 +err_fin 108807.4765625 +sparsity check 0.3999999612569809 +time 76.05 +71 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 65574.125 +err_fin 57730.90234375 +sparsity check 0.399999737739563 +time 1.54 +71 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 110519.40625 +err_fin 99854.078125 +sparsity check 0.399999737739563 +time 1.35 +71 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 23332.802734375 +err_fin 12481.970703125 +sparsity check 0.3999999612569809 +time 68.95 +71 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3605251.5 +err_fin 2672655.5 +sparsity check 0.399999988930566 +time 139.33 +71 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3398419.25 +err_fin 2508336.25 +sparsity check 0.399999988930566 +time 139.78 +71 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 455241.75 +err_fin 425527.34375 +sparsity check 0.399999988930566 +time 137.26 +72 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 196818.25 +err_fin 152508.28125 +sparsity check 0.3999999612569809 +time 76.06 +72 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 92007.8046875 +err_fin 81369.671875 +sparsity check 0.399999737739563 +time 1.53 +72 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 129737.375 +err_fin 117728.75 +sparsity check 0.399999737739563 +time 1.34 +72 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 30920.36328125 +err_fin 18197.14453125 +sparsity check 0.3999999612569809 +time 68.97 +72 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3690917.25 +err_fin 2729482.5 +sparsity check 0.399999988930566 +time 139.01 +72 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3501261.0 +err_fin 2578744.0 +sparsity check 0.399999988930566 +time 139.68 +72 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 489137.3125 +err_fin 456619.71875 +sparsity check 0.399999988930566 +time 137.34 +73 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 182977.8125 +err_fin 141293.25 +sparsity check 0.3999999612569809 +time 76.10 +73 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 89972.203125 +err_fin 79496.8515625 +sparsity check 0.399999737739563 +time 1.53 +73 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 147110.46875 +err_fin 131562.28125 +sparsity check 0.399999737739563 +time 1.35 +73 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 19759.52734375 +err_fin 11627.740234375 +sparsity check 0.3999999612569809 +time 68.95 +73 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3779088.5 +err_fin 2786089.25 +sparsity check 0.399999988930566 +time 139.34 +73 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3602361.0 +err_fin 2646019.0 +sparsity check 0.399999988930566 +time 139.38 +73 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 537911.375 +err_fin 499562.375 +sparsity check 0.399999988930566 +time 136.98 +74 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 166243.046875 +err_fin 127981.9765625 +sparsity check 0.3999999612569809 +time 76.05 +74 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 75013.25 +err_fin 65175.484375 +sparsity check 0.399999737739563 +time 1.36 +74 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 111847.640625 +err_fin 99155.484375 +sparsity check 0.399999737739563 +time 1.33 +74 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 40018.34375 +err_fin 20523.7578125 +sparsity check 0.3999999612569809 +time 69.04 +74 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3866103.0 +err_fin 2820464.75 +sparsity check 0.399999988930566 +time 139.43 +74 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3698074.0 +err_fin 2687355.5 +sparsity check 0.399999988930566 +time 139.73 +74 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 592491.375 +err_fin 546775.8125 +sparsity check 0.399999988930566 +time 137.35 +75 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 168544.46875 +err_fin 128167.65625 +sparsity check 0.3999999612569809 +time 76.06 +75 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 74026.578125 +err_fin 63848.19921875 +sparsity check 0.399999737739563 +time 1.35 +75 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 126137.78125 +err_fin 110095.375 +sparsity check 0.399999737739563 +time 1.35 +75 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 36850.890625 +err_fin 19020.365234375 +sparsity check 0.3999999612569809 +time 68.95 +75 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3900661.0 +err_fin 2822843.0 +sparsity check 0.399999988930566 +time 139.36 +75 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3724726.5 +err_fin 2687188.0 +sparsity check 0.399999988930566 +time 139.40 +75 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 667397.5625 +err_fin 608322.125 +sparsity check 0.399999988930566 +time 137.07 +76 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 253750.90625 +err_fin 189531.53125 +sparsity check 0.3999999612569809 +time 76.02 +76 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 107933.15625 +err_fin 91723.34375 +sparsity check 0.399999737739563 +time 1.34 +76 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 202030.109375 +err_fin 173529.890625 +sparsity check 0.399999737739563 +time 1.33 +76 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 123403.4453125 +err_fin 70644.203125 +sparsity check 0.3999999612569809 +time 68.98 +76 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3866278.5 +err_fin 2741640.25 +sparsity check 0.399999988930566 +time 139.36 +76 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3663346.75 +err_fin 2588183.5 +sparsity check 0.399999988930566 +time 139.71 +76 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 803513.4375 +err_fin 713280.9375 +sparsity check 0.399999988930566 +time 137.34 +77 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 167097.28125 +err_fin 121912.25 +sparsity check 0.3999999612569809 +time 76.05 +77 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 77177.921875 +err_fin 64188.6796875 +sparsity check 0.399999737739563 +time 1.55 +77 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 134594.03125 +err_fin 114464.7890625 +sparsity check 0.399999737739563 +time 1.34 +77 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 54597.484375 +err_fin 24432.23046875 +sparsity check 0.3999999612569809 +time 68.95 +77 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3596767.5 +err_fin 2479884.0 +sparsity check 0.399999988930566 +time 139.44 +77 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3412120.5 +err_fin 2341643.5 +sparsity check 0.399999988930566 +time 139.41 +77 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 920975.75 +err_fin 788917.125 +sparsity check 0.399999988930566 +time 137.28 +78 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 149312.1875 +err_fin 104054.328125 +sparsity check 0.3999999612569809 +time 76.06 +78 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 73333.828125 +err_fin 58532.171875 +sparsity check 0.399999737739563 +time 1.35 +78 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 119455.03125 +err_fin 100930.953125 +sparsity check 0.399999737739563 +time 1.34 +78 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 33042.1171875 +err_fin 16012.80078125 +sparsity check 0.3999999612569809 +time 69.15 +78 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3002931.5 +err_fin 1987053.25 +sparsity check 0.399999988930566 +time 139.52 +78 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2814261.0 +err_fin 1855245.25 +sparsity check 0.399999988930566 +time 139.48 +78 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 976734.5 +err_fin 781829.0 +sparsity check 0.399999988930566 +time 137.12 +79 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 93958.484375 +err_fin 60492.56640625 +sparsity check 0.3999999612569809 +time 76.09 +79 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 57846.0703125 +err_fin 44838.82421875 +sparsity check 0.399999737739563 +time 1.36 +79 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 52594.4296875 +err_fin 41885.23828125 +sparsity check 0.399999737739563 +time 1.35 +79 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 9301.072265625 +err_fin 3125.864990234375 +sparsity check 0.3999999612569809 +time 68.94 +79 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1829445.375 +err_fin 1109003.875 +sparsity check 0.399999988930566 +time 139.53 +79 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1672239.25 +err_fin 1014265.0 +sparsity check 0.399999988930566 +time 139.53 +79 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 825539.125 +err_fin 553685.25 +sparsity check 0.399999988930566 +time 137.52 +model.embed_tokens.weight tensor(2.5520e-06) +model.layers.0.self_attn.q_proj.weight tensor(0.0106) +model.layers.0.self_attn.k_proj.weight tensor(0.0180) +model.layers.0.self_attn.v_proj.weight tensor(0.0582) +model.layers.0.self_attn.o_proj.weight tensor(3.6061e-06) +model.layers.0.mlp.gate_proj.weight tensor(0.0001) +model.layers.0.mlp.up_proj.weight tensor(0.0001) +model.layers.0.mlp.down_proj.weight tensor(0.0088) +51508.91003680229 +Dataset: wikitext2 +Evaluating ... +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +Perplexity: 3.794084 diff --git a/logs/llama2-70-0.6-fix-mask b/logs/llama2-70-0.6-fix-mask new file mode 100644 index 0000000..dcd6773 --- /dev/null +++ b/logs/llama2-70-0.6-fix-mask @@ -0,0 +1,4020 @@ +Running on dev: cuda:0 +loading llama +llama loaded +Starting... on device cuda:0 +model.layers.0.self_attn.q_proj.weight torch.Size([8192, 8192]) (8192, 8192) 0.1 +model.layers.0.self_attn.k_proj.weight torch.Size([1024, 8192]) (1024, 8192) 0.2 +model.layers.0.mlp.gate_proj.weight torch.Size([28672, 8192]) (8192, 28672) 0.2 +Ready. +0 self_attn.q_proj +Pruning ... +0.3900318294763565 0.09003184735774994 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3178669878329639 0.9709505944546686 1.0 +err_prefin 67.93980407714844 +err_fin 17.388469696044922 +sparsity check 0.3900318294763565 +time 75.10 +0 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 84.05683898925781 +err_fin 22.081422805786133 +sparsity check 0.39999985694885254 +time 1.33 +0 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 8.222892761230469 +err_fin 4.271332740783691 +sparsity check 0.39999985694885254 +time 1.31 +0 self_attn.o_proj +Pruning ... +0.3798350691795349 0.07983508706092834 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2828887199008383 0.9709505944546686 1.0 +err_prefin 3.1282148361206055 +err_fin 0.16622190177440643 +sparsity check 0.3798350691795349 +time 67.96 +0 mlp.gate_proj +Pruning ... +0.3999928278582437 0.1999748945236206 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337774124002955 0.9709505944546686 1.0 +err_prefin 1111.20751953125 +err_fin 339.8477783203125 +sparsity check 0.3999928278582437 +time 137.86 +0 mlp.up_proj +Pruning ... +0.3999928278582437 0.1999748945236206 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337774124002955 0.9709505944546686 1.0 +err_prefin 1123.2789306640625 +err_fin 339.15533447265625 +sparsity check 0.3999928278582437 +time 138.22 +0 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 22.08213233947754 +err_fin 9.71038818359375 +sparsity check 0.3999999931880406 +time 135.68 +1 self_attn.q_proj +Pruning ... +0.38066042959690094 0.08066044747829437 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2857929308096117 0.9709505944546686 1.0 +err_prefin 937.7511596679688 +err_fin 194.9318084716797 +sparsity check 0.38066042959690094 +time 75.08 +1 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 973.091064453125 +err_fin 250.7093505859375 +sparsity check 0.39999985694885254 +time 1.32 +1 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 56.17710876464844 +err_fin 28.279510498046875 +sparsity check 0.39999985694885254 +time 1.33 +1 self_attn.o_proj +Pruning ... +0.3773004561662674 0.07730047404766083 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2738859507762847 0.9709505944546686 1.0 +err_prefin 59.44793701171875 +err_fin 9.102714538574219 +sparsity check 0.3773004561662674 +time 67.95 +1 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 8944.666015625 +err_fin 2393.19287109375 +sparsity check 0.3999999931880406 +time 137.87 +1 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 9889.7255859375 +err_fin 2553.67919921875 +sparsity check 0.3999999931880406 +time 138.10 +1 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 369.8732604980469 +err_fin 291.06585693359375 +sparsity check 0.3999999931880406 +time 135.76 +2 self_attn.q_proj +Pruning ... +0.39730459451675415 0.09730461239814758 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.341683593222438 0.9709505944546686 1.0 +err_prefin 3279.197265625 +err_fin 1069.7049560546875 +sparsity check 0.39730459451675415 +time 75.04 +2 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 4669.1806640625 +err_fin 1998.608154296875 +sparsity check 0.39999985694885254 +time 1.31 +2 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 277.66925048828125 +err_fin 176.12515258789062 +sparsity check 0.39999985694885254 +time 1.30 +2 self_attn.o_proj +Pruning ... +0.39454004168510437 0.0945400595664978 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3327359593366572 0.9709505944546686 1.0 +err_prefin 300.2970275878906 +err_fin 87.72624206542969 +sparsity check 0.39454004168510437 +time 67.93 +2 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 35212.859375 +err_fin 13353.748046875 +sparsity check 0.3999999931880406 +time 137.86 +2 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 36606.16015625 +err_fin 13565.90234375 +sparsity check 0.3999999931880406 +time 138.25 +2 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 1367.951416015625 +err_fin 1155.0523681640625 +sparsity check 0.3999999931880406 +time 135.80 +3 self_attn.q_proj +Pruning ... +0.39995184540748596 0.0999518632888794 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3501338626233381 0.9709505944546686 1.0 +err_prefin 29859.439453125 +err_fin 13811.650390625 +sparsity check 0.39995184540748596 +time 74.96 +3 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 20594.76953125 +err_fin 12368.8828125 +sparsity check 0.39999985694885254 +time 1.33 +3 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 3717.0654296875 +err_fin 2571.700439453125 +sparsity check 0.39999985694885254 +time 1.31 +3 self_attn.o_proj +Pruning ... +0.39858680963516235 0.09858682751655579 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.345790746317014 0.9709505944546686 1.0 +err_prefin 584.930419921875 +err_fin 247.90924072265625 +sparsity check 0.39858680963516235 +time 67.84 +3 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 77081.234375 +err_fin 36747.4921875 +sparsity check 0.3999999931880406 +time 137.80 +3 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 77310.25 +err_fin 36238.3203125 +sparsity check 0.3999999931880406 +time 138.07 +3 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 1846.95849609375 +err_fin 1632.4097900390625 +sparsity check 0.3999999931880406 +time 135.72 +4 self_attn.q_proj +Pruning ... +0.3999020755290985 0.09990209341049194 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.349976037564583 0.9709505944546686 1.0 +err_prefin 57675.33984375 +err_fin 29864.189453125 +sparsity check 0.3999020755290985 +time 74.97 +4 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 26593.50390625 +err_fin 16409.076171875 +sparsity check 0.39999985694885254 +time 1.33 +4 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 5928.640625 +err_fin 4469.638671875 +sparsity check 0.39999985694885254 +time 1.31 +4 self_attn.o_proj +Pruning ... +0.3970344215631485 0.09703443944454193 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3408147494946228 0.9709505944546686 1.0 +err_prefin 719.8106689453125 +err_fin 330.6018981933594 +sparsity check 0.3970344215631485 +time 67.85 +4 mlp.gate_proj +Pruning ... +0.39997968077659607 0.199928879737854 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337511125328974 0.9709505944546686 1.0 +err_prefin 126231.765625 +err_fin 68969.1875 +sparsity check 0.39997968077659607 +time 137.80 +4 mlp.up_proj +Pruning ... +0.3999303664479937 0.19975627958774567 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1336524138544812 0.9709505944546686 1.0 +err_prefin 123856.71875 +err_fin 66862.328125 +sparsity check 0.3999303664479937 +time 138.20 +4 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 2812.97265625 +err_fin 2526.6923828125 +sparsity check 0.3999999931880406 +time 135.79 +5 self_attn.q_proj +Pruning ... +0.39997562766075134 0.09997564554214478 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502092644027002 0.9709505944546686 1.0 +err_prefin 91248.796875 +err_fin 52015.40625 +sparsity check 0.39997562766075134 +time 74.95 +5 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 39969.01953125 +err_fin 27353.34375 +sparsity check 0.39999985694885254 +time 1.34 +5 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 8938.1171875 +err_fin 6987.9599609375 +sparsity check 0.39999985694885254 +time 1.31 +5 self_attn.o_proj +Pruning ... +0.3990519344806671 0.09905195236206055 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3472740069065536 0.9709505944546686 1.0 +err_prefin 1242.98095703125 +err_fin 540.0116577148438 +sparsity check 0.3990519344806671 +time 67.85 +5 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 158335.46875 +err_fin 92801.5 +sparsity check 0.3999999931880406 +time 137.82 +5 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 154464.203125 +err_fin 89651.2734375 +sparsity check 0.3999999931880406 +time 138.07 +5 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 3972.828857421875 +err_fin 3539.4873046875 +sparsity check 0.3999999931880406 +time 135.71 +6 self_attn.q_proj +Pruning ... +0.3999880701303482 0.09998808801174164 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502487097151674 0.9709505944546686 1.0 +err_prefin 103943.5625 +err_fin 62416.046875 +sparsity check 0.3999880701303482 +time 74.96 +6 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 43369.578125 +err_fin 31196.71875 +sparsity check 0.39999985694885254 +time 1.34 +6 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 10240.4765625 +err_fin 8313.4150390625 +sparsity check 0.39999985694885254 +time 1.30 +6 self_attn.o_proj +Pruning ... +0.3946770429611206 0.09467706084251404 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3331823778796275 0.9709505944546686 1.0 +err_prefin 2077.16015625 +err_fin 1079.2830810546875 +sparsity check 0.3946770429611206 +time 67.88 +6 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 210390.25 +err_fin 128369.984375 +sparsity check 0.3999999931880406 +time 137.79 +6 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 203740.015625 +err_fin 123401.203125 +sparsity check 0.3999999931880406 +time 138.22 +6 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 5852.21826171875 +err_fin 5284.48291015625 +sparsity check 0.3999999931880406 +time 135.82 +7 self_attn.q_proj +Pruning ... +0.3999880701303482 0.09998808801174164 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502487097151674 0.9709505944546686 1.0 +err_prefin 172368.59375 +err_fin 109349.6171875 +sparsity check 0.3999880701303482 +time 74.94 +7 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 59500.1796875 +err_fin 45512.26953125 +sparsity check 0.39999985694885254 +time 1.35 +7 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 14638.193359375 +err_fin 12277.2529296875 +sparsity check 0.39999985694885254 +time 1.31 +7 self_attn.o_proj +Pruning ... +0.3913321793079376 0.09133219718933105 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3221918325783726 0.9709505944546686 1.0 +err_prefin 2677.06298828125 +err_fin 1313.254150390625 +sparsity check 0.3913321793079376 +time 67.90 +7 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 259888.875 +err_fin 164794.28125 +sparsity check 0.3999999931880406 +time 137.80 +7 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 251294.296875 +err_fin 158165.375 +sparsity check 0.3999999931880406 +time 138.07 +7 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 8274.5498046875 +err_fin 7470.8671875 +sparsity check 0.3999999931880406 +time 135.71 +8 self_attn.q_proj +Pruning ... +0.3999880701303482 0.09998808801174164 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502487097151674 0.9709505944546686 1.0 +err_prefin 151063.484375 +err_fin 98389.7734375 +sparsity check 0.3999880701303482 +time 74.97 +8 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 58661.2265625 +err_fin 43408.1328125 +sparsity check 0.39999985694885254 +time 1.32 +8 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 13763.2666015625 +err_fin 11586.0107421875 +sparsity check 0.39999985694885254 +time 1.30 +8 self_attn.o_proj +Pruning ... +0.39467422664165497 0.0946742445230484 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3331732040868158 0.9709505944546686 1.0 +err_prefin 4621.22314453125 +err_fin 2204.10498046875 +sparsity check 0.39467422664165497 +time 67.85 +8 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 357495.0 +err_fin 235801.171875 +sparsity check 0.3999999931880406 +time 137.81 +8 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 338663.09375 +err_fin 221967.1875 +sparsity check 0.3999999931880406 +time 138.20 +8 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 16046.419921875 +err_fin 14547.4453125 +sparsity check 0.3999999931880406 +time 135.80 +9 self_attn.q_proj +Pruning ... +0.39997535943984985 0.09997537732124329 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.350208414057257 0.9709505944546686 1.0 +err_prefin 165287.0625 +err_fin 115790.078125 +sparsity check 0.39997535943984985 +time 74.96 +9 self_attn.k_proj +Pruning ... +0.399999737739563 0.19999980926513672 0.3749997615814209 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446747913956842 0.9709505944546686 1.0 +err_prefin 57355.265625 +err_fin 46549.86328125 +sparsity check 0.399999737739563 +time 1.32 +9 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 20768.37109375 +err_fin 18328.796875 +sparsity check 0.39999985694885254 +time 1.31 +9 self_attn.o_proj +Pruning ... +0.39449165761470795 0.09449167549610138 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.33257822431971 0.9709505944546686 1.0 +err_prefin 2547.7529296875 +err_fin 1121.1663818359375 +sparsity check 0.39449165761470795 +time 67.82 +9 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 455587.1875 +err_fin 311802.75 +sparsity check 0.3999999931880406 +time 137.78 +9 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 428959.15625 +err_fin 291601.25 +sparsity check 0.3999999931880406 +time 138.02 +9 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 14794.69921875 +err_fin 13578.59765625 +sparsity check 0.3999999931880406 +time 135.67 +10 self_attn.q_proj +Pruning ... +0.39997535943984985 0.09997537732124329 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.350208414057257 0.9709505944546686 1.0 +err_prefin 99369.4609375 +err_fin 70449.3203125 +sparsity check 0.39997535943984985 +time 74.94 +10 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 39679.23828125 +err_fin 30677.9765625 +sparsity check 0.39999985694885254 +time 1.35 +10 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 20211.69921875 +err_fin 17682.802734375 +sparsity check 0.39999985694885254 +time 1.30 +10 self_attn.o_proj +Pruning ... +0.38530606031417847 0.0853060781955719 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3018958142505257 0.9709505944546686 1.0 +err_prefin 1716.8173828125 +err_fin 816.5371704101562 +sparsity check 0.38530606031417847 +time 67.90 +10 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 548887.8125 +err_fin 392840.125 +sparsity check 0.3999999931880406 +time 137.79 +10 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 515091.375 +err_fin 366501.40625 +sparsity check 0.3999999931880406 +time 138.07 +10 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 16544.64453125 +err_fin 15337.5859375 +sparsity check 0.3999999931880406 +time 135.68 +11 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 159820.9375 +err_fin 113646.6875 +sparsity check 0.39998745918273926 +time 74.96 +11 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 64640.4375 +err_fin 49469.5078125 +sparsity check 0.39999985694885254 +time 1.34 +11 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 24035.24609375 +err_fin 20733.14453125 +sparsity check 0.39999985694885254 +time 1.32 +11 self_attn.o_proj +Pruning ... +0.38262051343917847 0.0826205313205719 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.292637027454332 0.9709505944546686 1.0 +err_prefin 2840.35986328125 +err_fin 1312.10595703125 +sparsity check 0.38262051343917847 +time 67.88 +11 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 589919.125 +err_fin 429187.125 +sparsity check 0.3999999931880406 +time 137.77 +11 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 556431.3125 +err_fin 402648.0 +sparsity check 0.3999999931880406 +time 138.06 +11 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 18161.0546875 +err_fin 16874.15234375 +sparsity check 0.3999999931880406 +time 135.65 +12 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 134002.015625 +err_fin 95821.796875 +sparsity check 0.39998745918273926 +time 74.95 +12 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 58485.625 +err_fin 44912.3515625 +sparsity check 0.39999985694885254 +time 1.32 +12 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 18154.234375 +err_fin 15332.16796875 +sparsity check 0.39999985694885254 +time 1.32 +12 self_attn.o_proj +Pruning ... +0.3908897191286087 0.09088973701000214 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3207235687633965 0.9709505944546686 1.0 +err_prefin 3292.9931640625 +err_fin 1473.5245361328125 +sparsity check 0.3908897191286087 +time 67.86 +12 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 583987.1875 +err_fin 426351.125 +sparsity check 0.3999999931880406 +time 137.79 +12 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 556522.8125 +err_fin 404676.78125 +sparsity check 0.3999999931880406 +time 138.09 +12 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 20344.2109375 +err_fin 18797.353515625 +sparsity check 0.3999999931880406 +time 135.66 +13 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 178159.578125 +err_fin 128332.265625 +sparsity check 0.39998745918273926 +time 74.93 +13 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 65993.390625 +err_fin 51519.92578125 +sparsity check 0.39999985694885254 +time 1.35 +13 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 24204.1953125 +err_fin 20978.1875 +sparsity check 0.39999985694885254 +time 1.32 +13 self_attn.o_proj +Pruning ... +0.3840336948633194 0.08403371274471283 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2975260219707532 0.9709505944546686 1.0 +err_prefin 8226.1689453125 +err_fin 4241.2490234375 +sparsity check 0.3840336948633194 +time 67.85 +13 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 621849.8125 +err_fin 446843.34375 +sparsity check 0.3999999931880406 +time 137.76 +13 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 598487.125 +err_fin 428082.15625 +sparsity check 0.3999999931880406 +time 138.06 +13 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 25953.232421875 +err_fin 23996.412109375 +sparsity check 0.3999999931880406 +time 135.68 +14 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 291831.1875 +err_fin 212318.5625 +sparsity check 0.39998745918273926 +time 74.96 +14 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 96605.0 +err_fin 77073.796875 +sparsity check 0.39999985694885254 +time 1.34 +14 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 34245.859375 +err_fin 30060.234375 +sparsity check 0.39999985694885254 +time 1.31 +14 self_attn.o_proj +Pruning ... +0.3914257138967514 0.09142573177814484 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.322501782332778 0.9709505944546686 1.0 +err_prefin 10807.0009765625 +err_fin 5231.7880859375 +sparsity check 0.3914257138967514 +time 67.84 +14 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 716520.875 +err_fin 529446.0 +sparsity check 0.3999999931880406 +time 137.80 +14 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 689797.25 +err_fin 507253.46875 +sparsity check 0.3999999931880406 +time 138.19 +14 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 27627.48046875 +err_fin 25726.07421875 +sparsity check 0.3999999931880406 +time 135.63 +15 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 214802.015625 +err_fin 160035.40625 +sparsity check 0.39998745918273926 +time 74.92 +15 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 72588.5859375 +err_fin 58471.12109375 +sparsity check 0.39999985694885254 +time 1.33 +15 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 40562.40625 +err_fin 36233.296875 +sparsity check 0.39999985694885254 +time 1.32 +15 self_attn.o_proj +Pruning ... +0.38133588433265686 0.08133590221405029 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2881597988885451 0.9709505944546686 1.0 +err_prefin 8441.4765625 +err_fin 4586.09619140625 +sparsity check 0.38133588433265686 +time 67.87 +15 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 772686.375 +err_fin 576230.125 +sparsity check 0.3999999931880406 +time 137.78 +15 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 746677.375 +err_fin 554146.5625 +sparsity check 0.3999999931880406 +time 138.06 +15 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 31399.396484375 +err_fin 29217.23828125 +sparsity check 0.3999999931880406 +time 135.68 +16 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 208473.703125 +err_fin 157294.0625 +sparsity check 0.39998745918273926 +time 74.95 +16 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 79562.640625 +err_fin 64505.265625 +sparsity check 0.39999985694885254 +time 1.34 +16 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 37636.8671875 +err_fin 33756.921875 +sparsity check 0.39999985694885254 +time 1.31 +16 self_attn.o_proj +Pruning ... +0.3909340649843216 0.09093408286571503 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3208708803257494 0.9709505944546686 1.0 +err_prefin 8207.4296875 +err_fin 4186.65625 +sparsity check 0.3909340649843216 +time 67.84 +16 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 809634.875 +err_fin 610233.625 +sparsity check 0.3999999931880406 +time 137.77 +16 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 785285.875 +err_fin 589506.125 +sparsity check 0.3999999931880406 +time 138.07 +16 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 32459.40234375 +err_fin 30296.30078125 +sparsity check 0.3999999931880406 +time 135.68 +17 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 267344.5 +err_fin 199147.96875 +sparsity check 0.39998745918273926 +time 74.95 +17 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 95325.3203125 +err_fin 75927.9921875 +sparsity check 0.39999985694885254 +time 1.33 +17 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 33506.296875 +err_fin 29167.193359375 +sparsity check 0.39999985694885254 +time 1.32 +17 self_attn.o_proj +Pruning ... +0.39738740026950836 0.0973874181509018 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3419496463900018 0.9709505944546686 1.0 +err_prefin 13549.751953125 +err_fin 6180.005859375 +sparsity check 0.39738740026950836 +time 67.86 +17 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 804724.1875 +err_fin 598717.0625 +sparsity check 0.3999999931880406 +time 137.78 +17 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 782598.375 +err_fin 579610.25 +sparsity check 0.3999999931880406 +time 138.09 +17 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 40987.078125 +err_fin 37530.9296875 +sparsity check 0.3999999931880406 +time 135.67 +18 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 253699.828125 +err_fin 190201.734375 +sparsity check 0.39998745918273926 +time 74.94 +18 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 87223.015625 +err_fin 71011.21875 +sparsity check 0.39999985694885254 +time 1.34 +18 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 50740.96875 +err_fin 45821.36328125 +sparsity check 0.39999985694885254 +time 1.31 +18 self_attn.o_proj +Pruning ... +0.3879019021987915 0.08790192008018494 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3107185316560104 0.9709505944546686 1.0 +err_prefin 14054.927734375 +err_fin 6564.66162109375 +sparsity check 0.3879019021987915 +time 67.86 +18 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 884590.0625 +err_fin 660727.25 +sparsity check 0.3999999931880406 +time 137.76 +18 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 854321.8125 +err_fin 635184.0 +sparsity check 0.3999999931880406 +time 138.15 +18 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 45267.578125 +err_fin 41552.92578125 +sparsity check 0.3999999931880406 +time 135.60 +19 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 297510.625 +err_fin 226424.546875 +sparsity check 0.39998745918273926 +time 74.94 +19 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 93994.25 +err_fin 79128.078125 +sparsity check 0.39999985694885254 +time 1.31 +19 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 59418.61328125 +err_fin 54665.296875 +sparsity check 0.39999985694885254 +time 1.31 +19 self_attn.o_proj +Pruning ... +0.38000810146331787 0.0800081193447113 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.283498675942924 0.9709505944546686 1.0 +err_prefin 9839.859375 +err_fin 5089.5126953125 +sparsity check 0.38000810146331787 +time 67.90 +19 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 959164.4375 +err_fin 721920.1875 +sparsity check 0.3999999931880406 +time 137.78 +19 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 927413.125 +err_fin 695242.5 +sparsity check 0.3999999931880406 +time 137.98 +19 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 46537.91796875 +err_fin 43178.7578125 +sparsity check 0.3999999931880406 +time 135.66 +20 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 262611.65625 +err_fin 202144.71875 +sparsity check 0.39998745918273926 +time 74.95 +20 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 85072.703125 +err_fin 70396.765625 +sparsity check 0.39999985694885254 +time 1.35 +20 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 48525.015625 +err_fin 44793.02734375 +sparsity check 0.39999985694885254 +time 1.32 +20 self_attn.o_proj +Pruning ... +0.38832084834575653 0.08832086622714996 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3121309911892807 0.9709505944546686 1.0 +err_prefin 8732.67578125 +err_fin 3871.71484375 +sparsity check 0.38832084834575653 +time 67.87 +20 mlp.gate_proj +Pruning ... +0.39999318974358694 0.19997616112232208 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.13377813625084 0.9709505944546686 1.0 +err_prefin 1027752.75 +err_fin 777022.5 +sparsity check 0.39999318974358694 +time 137.78 +20 mlp.up_proj +Pruning ... +0.39999318974358694 0.19997616112232208 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.13377813625084 0.9709505944546686 1.0 +err_prefin 994249.375 +err_fin 748517.75 +sparsity check 0.39999318974358694 +time 138.02 +20 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 47753.5234375 +err_fin 44481.6484375 +sparsity check 0.3999999931880406 +time 135.76 +21 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 166584.8125 +err_fin 127685.875 +sparsity check 0.39998745918273926 +time 74.95 +21 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 60790.12890625 +err_fin 49373.1796875 +sparsity check 0.39999985694885254 +time 1.32 +21 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 42025.9140625 +err_fin 38236.7421875 +sparsity check 0.39999985694885254 +time 1.31 +21 self_attn.o_proj +Pruning ... +0.3720995783805847 0.07209959626197815 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2550014079986151 0.9709505944546686 1.0 +err_prefin 10183.21875 +err_fin 5230.98681640625 +sparsity check 0.3720995783805847 +time 67.90 +21 mlp.gate_proj +Pruning ... +0.39999318974358694 0.19997616112232208 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.13377813625084 0.9709505944546686 1.0 +err_prefin 1073376.125 +err_fin 813125.625 +sparsity check 0.39999318974358694 +time 137.77 +21 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 1042929.4375 +err_fin 787203.875 +sparsity check 0.3999999931880406 +time 138.03 +21 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 49620.4609375 +err_fin 46504.046875 +sparsity check 0.3999999931880406 +time 135.70 +22 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 158965.96875 +err_fin 122928.96875 +sparsity check 0.39998745918273926 +time 75.08 +22 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 66577.109375 +err_fin 56351.9765625 +sparsity check 0.39999985694885254 +time 1.34 +22 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 47186.84375 +err_fin 43472.34375 +sparsity check 0.39999985694885254 +time 1.31 +22 self_attn.o_proj +Pruning ... +0.39435090124607086 0.0943509191274643 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.332119125472264 0.9709505944546686 1.0 +err_prefin 7203.67578125 +err_fin 3286.880859375 +sparsity check 0.39435090124607086 +time 67.96 +22 mlp.gate_proj +Pruning ... +0.39999318974358694 0.19997616112232208 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.13377813625084 0.9709505944546686 1.0 +err_prefin 1123434.375 +err_fin 853420.625 +sparsity check 0.39999318974358694 +time 137.99 +22 mlp.up_proj +Pruning ... +0.39999318974358694 0.19997616112232208 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.13377813625084 0.9709505944546686 1.0 +err_prefin 1094003.25 +err_fin 828283.125 +sparsity check 0.39999318974358694 +time 138.56 +22 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 50516.125 +err_fin 47532.46875 +sparsity check 0.3999999931880406 +time 135.68 +23 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 229021.375 +err_fin 175342.1875 +sparsity check 0.39998745918273926 +time 75.01 +23 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 91112.59375 +err_fin 75837.8359375 +sparsity check 0.39999985694885254 +time 1.33 +23 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 50323.359375 +err_fin 46203.8828125 +sparsity check 0.39999985694885254 +time 1.31 +23 self_attn.o_proj +Pruning ... +0.388177290558815 0.08817730844020844 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3116473469240155 0.9709505944546686 1.0 +err_prefin 10403.603515625 +err_fin 5000.8935546875 +sparsity check 0.388177290558815 +time 67.92 +23 mlp.gate_proj +Pruning ... +0.39999318974358694 0.19997616112232208 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.13377813625084 0.9709505944546686 1.0 +err_prefin 1150988.75 +err_fin 876287.0625 +sparsity check 0.39999318974358694 +time 138.11 +23 mlp.up_proj +Pruning ... +0.39999318974358694 0.19997616112232208 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.13377813625084 0.9709505944546686 1.0 +err_prefin 1123824.5 +err_fin 852309.5625 +sparsity check 0.39999318974358694 +time 138.39 +23 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 52224.23828125 +err_fin 48965.41015625 +sparsity check 0.3999999931880406 +time 135.98 +24 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 203358.765625 +err_fin 153987.578125 +sparsity check 0.39998745918273926 +time 75.00 +24 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 80287.296875 +err_fin 65322.984375 +sparsity check 0.39999985694885254 +time 1.32 +24 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 38595.3125 +err_fin 34314.96484375 +sparsity check 0.39999985694885254 +time 1.31 +24 self_attn.o_proj +Pruning ... +0.3913518041372299 0.09135182201862335 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.322256876856434 0.9709505944546686 1.0 +err_prefin 9357.115234375 +err_fin 4763.25439453125 +sparsity check 0.3913518041372299 +time 67.88 +24 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 1118270.0 +err_fin 854237.875 +sparsity check 0.3999999931880406 +time 138.05 +24 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 1101718.75 +err_fin 839207.4375 +sparsity check 0.3999999931880406 +time 138.47 +24 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 52586.859375 +err_fin 49340.09765625 +sparsity check 0.3999999931880406 +time 135.86 +25 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 236197.78125 +err_fin 179507.046875 +sparsity check 0.39998745918273926 +time 75.00 +25 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 91753.109375 +err_fin 74472.09375 +sparsity check 0.39999985694885254 +time 1.35 +25 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 40073.37109375 +err_fin 35583.90234375 +sparsity check 0.39999985694885254 +time 1.31 +25 self_attn.o_proj +Pruning ... +0.3883741497993469 0.08837416768074036 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3123104690436524 0.9709505944546686 1.0 +err_prefin 21641.259765625 +err_fin 11300.3251953125 +sparsity check 0.3883741497993469 +time 67.91 +25 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 1072256.875 +err_fin 797672.1875 +sparsity check 0.3999999931880406 +time 138.07 +25 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 1065880.375 +err_fin 789996.25 +sparsity check 0.3999999931880406 +time 138.31 +25 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 72162.203125 +err_fin 66710.90625 +sparsity check 0.3999999931880406 +time 135.91 +26 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 345840.21875 +err_fin 262721.625 +sparsity check 0.39998745918273926 +time 74.99 +26 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 117953.203125 +err_fin 98765.1875 +sparsity check 0.39999985694885254 +time 1.32 +26 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 61456.359375 +err_fin 55078.0625 +sparsity check 0.39999985694885254 +time 1.31 +26 self_attn.o_proj +Pruning ... +0.39200539886951447 0.0920054167509079 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3244193268192328 0.9709505944546686 1.0 +err_prefin 25394.7890625 +err_fin 13353.552734375 +sparsity check 0.39200539886951447 +time 67.88 +26 mlp.gate_proj +Pruning ... +0.39999318974358694 0.19997616112232208 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.13377813625084 0.9709505944546686 1.0 +err_prefin 1216229.375 +err_fin 914831.125 +sparsity check 0.39999318974358694 +time 138.07 +26 mlp.up_proj +Pruning ... +0.39999318974358694 0.19997616112232208 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.13377813625084 0.9709505944546686 1.0 +err_prefin 1202893.0 +err_fin 901367.75 +sparsity check 0.39999318974358694 +time 138.37 +26 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 71759.484375 +err_fin 67622.875 +sparsity check 0.3999999931880406 +time 135.96 +27 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 157163.765625 +err_fin 121445.734375 +sparsity check 0.39998745918273926 +time 74.97 +27 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 68836.203125 +err_fin 57562.640625 +sparsity check 0.39999985694885254 +time 1.35 +27 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 50982.28515625 +err_fin 47113.44921875 +sparsity check 0.39999985694885254 +time 1.31 +27 self_attn.o_proj +Pruning ... +0.39123478531837463 0.09123480319976807 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3218689321241066 0.9709505944546686 1.0 +err_prefin 10619.111328125 +err_fin 4833.744140625 +sparsity check 0.39123478531837463 +time 67.90 +27 mlp.gate_proj +Pruning ... +0.39999318974358694 0.19997616112232208 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.13377813625084 0.9709505944546686 1.0 +err_prefin 1286122.75 +err_fin 972871.5 +sparsity check 0.39999318974358694 +time 138.01 +27 mlp.up_proj +Pruning ... +0.39999318974358694 0.19997616112232208 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.13377813625084 0.9709505944546686 1.0 +err_prefin 1268991.125 +err_fin 955722.75 +sparsity check 0.39999318974358694 +time 138.30 +27 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 75729.375 +err_fin 71600.2578125 +sparsity check 0.3999999931880406 +time 135.90 +28 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 285951.84375 +err_fin 220914.515625 +sparsity check 0.39998745918273926 +time 74.97 +28 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 106908.3359375 +err_fin 92089.265625 +sparsity check 0.39999985694885254 +time 1.33 +28 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 78111.703125 +err_fin 72002.25 +sparsity check 0.39999985694885254 +time 1.31 +28 self_attn.o_proj +Pruning ... +0.38880935311317444 0.08880937099456787 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3137739943616542 0.9709505944546686 1.0 +err_prefin 18052.15625 +err_fin 9518.7861328125 +sparsity check 0.38880935311317444 +time 67.91 +28 mlp.gate_proj +Pruning ... +0.39999318974358694 0.19997616112232208 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.13377813625084 0.9709505944546686 1.0 +err_prefin 1357190.625 +err_fin 1033252.6875 +sparsity check 0.39999318974358694 +time 137.84 +28 mlp.up_proj +Pruning ... +0.39999318974358694 0.19997616112232208 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.13377813625084 0.9709505944546686 1.0 +err_prefin 1338597.375 +err_fin 1014937.4375 +sparsity check 0.39999318974358694 +time 138.04 +28 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 80249.2890625 +err_fin 76092.71875 +sparsity check 0.3999999931880406 +time 135.74 +29 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 283216.46875 +err_fin 219701.96875 +sparsity check 0.39998745918273926 +time 74.99 +29 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 100867.9296875 +err_fin 85445.09375 +sparsity check 0.39999985694885254 +time 1.32 +29 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 83404.8984375 +err_fin 77111.921875 +sparsity check 0.39999985694885254 +time 1.32 +29 self_attn.o_proj +Pruning ... +0.38364720344543457 0.083647221326828 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2961926723983255 0.9709505944546686 1.0 +err_prefin 18665.7265625 +err_fin 10149.21484375 +sparsity check 0.38364720344543457 +time 67.86 +29 mlp.gate_proj +Pruning ... +0.39999318974358694 0.19997616112232208 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.13377813625084 0.9709505944546686 1.0 +err_prefin 1436191.25 +err_fin 1099280.125 +sparsity check 0.39999318974358694 +time 137.85 +29 mlp.up_proj +Pruning ... +0.39999318974358694 0.19997616112232208 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.13377813625084 0.9709505944546686 1.0 +err_prefin 1411887.5 +err_fin 1076554.625 +sparsity check 0.39999318974358694 +time 138.10 +29 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 84264.09375 +err_fin 80249.1875 +sparsity check 0.3999999931880406 +time 135.73 +30 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 147917.53125 +err_fin 115567.9921875 +sparsity check 0.39998745918273926 +time 74.99 +30 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 53050.2109375 +err_fin 45327.39453125 +sparsity check 0.39999985694885254 +time 1.34 +30 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 75653.828125 +err_fin 70534.90625 +sparsity check 0.39999985694885254 +time 1.32 +30 self_attn.o_proj +Pruning ... +0.3966815769672394 0.09668159484863281 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3396782334429505 0.9709505944546686 1.0 +err_prefin 9131.25390625 +err_fin 4687.3447265625 +sparsity check 0.3966815769672394 +time 67.88 +30 mlp.gate_proj +Pruning ... +0.39999318974358694 0.19997616112232208 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.13377813625084 0.9709505944546686 1.0 +err_prefin 1520840.0 +err_fin 1170527.75 +sparsity check 0.39999318974358694 +time 137.83 +30 mlp.up_proj +Pruning ... +0.39999318974358694 0.19997616112232208 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.13377813625084 0.9709505944546686 1.0 +err_prefin 1489470.5 +err_fin 1141605.0 +sparsity check 0.39999318974358694 +time 138.03 +30 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 87122.390625 +err_fin 83041.5859375 +sparsity check 0.3999999931880406 +time 135.72 +31 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 300016.6875 +err_fin 233821.0625 +sparsity check 0.39998745918273926 +time 74.96 +31 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 101753.1328125 +err_fin 87469.875 +sparsity check 0.39999985694885254 +time 1.31 +31 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 83331.7890625 +err_fin 75658.828125 +sparsity check 0.39999985694885254 +time 1.31 +31 self_attn.o_proj +Pruning ... +0.3851250559091568 0.08512507379055023 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3012760111818993 0.9709505944546686 1.0 +err_prefin 18780.552734375 +err_fin 10706.595703125 +sparsity check 0.3851250559091568 +time 67.86 +31 mlp.gate_proj +Pruning ... +0.39999318974358694 0.19997616112232208 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.13377813625084 0.9709505944546686 1.0 +err_prefin 1585097.625 +err_fin 1230850.0 +sparsity check 0.39999318974358694 +time 137.81 +31 mlp.up_proj +Pruning ... +0.3999858626297542 0.19995051622390747 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.133763479600881 0.9709505944546686 1.0 +err_prefin 1549287.75 +err_fin 1198593.0 +sparsity check 0.3999858626297542 +time 138.09 +31 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 88640.484375 +err_fin 84655.578125 +sparsity check 0.3999999931880406 +time 135.74 +32 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 334009.03125 +err_fin 262031.40625 +sparsity check 0.39998745918273926 +time 74.97 +32 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 108645.09375 +err_fin 94478.96875 +sparsity check 0.39999985694885254 +time 1.33 +32 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 74391.625 +err_fin 69028.0625 +sparsity check 0.39999985694885254 +time 1.31 +32 self_attn.o_proj +Pruning ... +0.39311298727989197 0.0931130051612854 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3280670181258327 0.9709505944546686 1.0 +err_prefin 13726.9619140625 +err_fin 7054.3671875 +sparsity check 0.39311298727989197 +time 67.89 +32 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 1625557.375 +err_fin 1258778.0 +sparsity check 0.3999999931880406 +time 137.82 +32 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 1591531.75 +err_fin 1228276.25 +sparsity check 0.3999999931880406 +time 138.04 +32 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 93971.7265625 +err_fin 89482.7890625 +sparsity check 0.3999999931880406 +time 135.73 +33 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 475604.625 +err_fin 369777.0625 +sparsity check 0.39998745918273926 +time 74.97 +33 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 136738.765625 +err_fin 116955.046875 +sparsity check 0.39999985694885254 +time 1.33 +33 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 83920.1875 +err_fin 76452.140625 +sparsity check 0.39999985694885254 +time 1.31 +33 self_attn.o_proj +Pruning ... +0.3873485326766968 0.08734855055809021 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3088480261314723 0.9709505944546686 1.0 +err_prefin 28934.658203125 +err_fin 15025.1640625 +sparsity check 0.3873485326766968 +time 67.87 +33 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 1646083.75 +err_fin 1256310.375 +sparsity check 0.3999999931880406 +time 137.81 +33 mlp.up_proj +Pruning ... +0.39999318974358694 0.19997616112232208 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.13377813625084 0.9709505944546686 1.0 +err_prefin 1618582.0 +err_fin 1230377.625 +sparsity check 0.39999318974358694 +time 138.08 +33 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 107294.1640625 +err_fin 101588.65625 +sparsity check 0.3999999931880406 +time 135.70 +34 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 173598.65625 +err_fin 133135.75 +sparsity check 0.39998745918273926 +time 74.96 +34 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 72545.25 +err_fin 58444.1015625 +sparsity check 0.39999985694885254 +time 1.33 +34 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 50414.578125 +err_fin 45054.91015625 +sparsity check 0.39999985694885254 +time 1.32 +34 self_attn.o_proj +Pruning ... +0.390749529004097 0.09074954688549042 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.320257648229824 0.9709505944546686 1.0 +err_prefin 18316.734375 +err_fin 9629.263671875 +sparsity check 0.390749529004097 +time 67.87 +34 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 1701501.0 +err_fin 1307156.5 +sparsity check 0.3999999931880406 +time 137.79 +34 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 1672554.5 +err_fin 1279826.5 +sparsity check 0.3999999931880406 +time 138.04 +34 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 116514.109375 +err_fin 110948.59375 +sparsity check 0.3999999931880406 +time 135.70 +35 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 282674.09375 +err_fin 219742.546875 +sparsity check 0.39998745918273926 +time 74.96 +35 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 91505.140625 +err_fin 75506.5625 +sparsity check 0.39999985694885254 +time 1.33 +35 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 68267.65625 +err_fin 62404.51953125 +sparsity check 0.39999985694885254 +time 1.32 +35 self_attn.o_proj +Pruning ... +0.37593840062618256 0.07593841850757599 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2689945890721086 0.9709505944546686 1.0 +err_prefin 21132.0 +err_fin 10591.478515625 +sparsity check 0.37593840062618256 +time 67.89 +35 mlp.gate_proj +Pruning ... +0.3999858626297542 0.19995051622390747 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.133763479600881 0.9709505944546686 1.0 +err_prefin 1804963.75 +err_fin 1390331.75 +sparsity check 0.3999858626297542 +time 137.82 +35 mlp.up_proj +Pruning ... +0.3999858626297542 0.19995051622390747 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.133763479600881 0.9709505944546686 1.0 +err_prefin 1771623.25 +err_fin 1359545.625 +sparsity check 0.3999858626297542 +time 138.07 +35 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 124995.6875 +err_fin 119226.5234375 +sparsity check 0.3999999931880406 +time 135.70 +36 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 270950.5 +err_fin 210602.453125 +sparsity check 0.39998745918273926 +time 74.96 +36 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 88066.09375 +err_fin 73971.8671875 +sparsity check 0.39999985694885254 +time 1.33 +36 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 66076.25 +err_fin 60804.046875 +sparsity check 0.39999985694885254 +time 1.31 +36 self_attn.o_proj +Pruning ... +0.38336338102817535 0.08336339890956879 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2952117289462595 0.9709505944546686 1.0 +err_prefin 14990.6181640625 +err_fin 7396.81787109375 +sparsity check 0.38336338102817535 +time 67.89 +36 mlp.gate_proj +Pruning ... +0.3999858626297542 0.19995051622390747 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.133763479600881 0.9709505944546686 1.0 +err_prefin 1848811.25 +err_fin 1421905.375 +sparsity check 0.3999858626297542 +time 137.84 +36 mlp.up_proj +Pruning ... +0.3999858626297542 0.19995051622390747 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.133763479600881 0.9709505944546686 1.0 +err_prefin 1815923.375 +err_fin 1391225.75 +sparsity check 0.3999858626297542 +time 138.00 +36 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 139403.25 +err_fin 132602.0 +sparsity check 0.3999999931880406 +time 135.73 +37 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 429569.09375 +err_fin 332502.1875 +sparsity check 0.39998745918273926 +time 74.97 +37 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 123268.078125 +err_fin 103549.9375 +sparsity check 0.39999985694885254 +time 1.31 +37 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 85408.6953125 +err_fin 78623.1171875 +sparsity check 0.39999985694885254 +time 1.30 +37 self_attn.o_proj +Pruning ... +0.3839271813631058 0.0839271992444992 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2971588429930576 0.9709505944546686 1.0 +err_prefin 31056.51171875 +err_fin 13986.1318359375 +sparsity check 0.3839271813631058 +time 67.89 +37 mlp.gate_proj +Pruning ... +0.3999858626297542 0.19995051622390747 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.133763479600881 0.9709505944546686 1.0 +err_prefin 1934857.0 +err_fin 1477883.0 +sparsity check 0.3999858626297542 +time 137.82 +37 mlp.up_proj +Pruning ... +0.39999266607420786 0.19997432827949524 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337770887951855 0.9709505944546686 1.0 +err_prefin 1900476.25 +err_fin 1446482.0 +sparsity check 0.39999266607420786 +time 138.10 +37 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 153010.03125 +err_fin 145332.546875 +sparsity check 0.3999999931880406 +time 135.72 +38 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 480920.25 +err_fin 367853.8125 +sparsity check 0.39998745918273926 +time 74.98 +38 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 137635.5 +err_fin 111749.28125 +sparsity check 0.39999985694885254 +time 1.33 +38 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 91433.40625 +err_fin 82317.390625 +sparsity check 0.39999985694885254 +time 1.31 +38 self_attn.o_proj +Pruning ... +0.38491450250148773 0.08491452038288116 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3005542614358636 0.9709505944546686 1.0 +err_prefin 53930.984375 +err_fin 24837.0546875 +sparsity check 0.38491450250148773 +time 67.91 +38 mlp.gate_proj +Pruning ... +0.39999266607420786 0.19997432827949524 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337770887951855 0.9709505944546686 1.0 +err_prefin 1996669.0 +err_fin 1521184.0 +sparsity check 0.39999266607420786 +time 137.80 +38 mlp.up_proj +Pruning ... +0.39999266607420786 0.19997432827949524 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337770887951855 0.9709505944546686 1.0 +err_prefin 1965801.75 +err_fin 1491614.75 +sparsity check 0.39999266607420786 +time 138.05 +38 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 172013.5625 +err_fin 163074.984375 +sparsity check 0.3999999931880406 +time 135.74 +39 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 588931.125 +err_fin 449892.03125 +sparsity check 0.39998745918273926 +time 74.97 +39 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 169708.15625 +err_fin 144543.78125 +sparsity check 0.39999985694885254 +time 1.32 +39 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 99360.171875 +err_fin 89540.0 +sparsity check 0.39999985694885254 +time 1.31 +39 self_attn.o_proj +Pruning ... +0.39136700332164764 0.09136702120304108 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3223072482351075 0.9709505944546686 1.0 +err_prefin 62931.125 +err_fin 28539.208984375 +sparsity check 0.39136700332164764 +time 67.88 +39 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 2023570.5 +err_fin 1523428.0 +sparsity check 0.3999999931880406 +time 137.80 +39 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 1995750.25 +err_fin 1497013.0 +sparsity check 0.3999999931880406 +time 138.07 +39 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 209161.71875 +err_fin 196051.59375 +sparsity check 0.3999999931880406 +time 135.72 +40 self_attn.q_proj +Pruning ... +0.3999999761581421 0.09999999403953552 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502864520676674 0.9709505944546686 1.0 +err_prefin 548435.0 +err_fin 412508.59375 +sparsity check 0.3999999761581421 +time 74.96 +40 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 148578.1875 +err_fin 126080.5234375 +sparsity check 0.39999985694885254 +time 1.32 +40 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 112805.53125 +err_fin 100186.0625 +sparsity check 0.39999985694885254 +time 1.31 +40 self_attn.o_proj +Pruning ... +0.3920653164386749 0.09206533432006836 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3246171971444998 0.9709505944546686 1.0 +err_prefin 90355.03125 +err_fin 47657.5546875 +sparsity check 0.3920653164386749 +time 67.89 +40 mlp.gate_proj +Pruning ... +0.39999266607420786 0.19997432827949524 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337770887951855 0.9709505944546686 1.0 +err_prefin 2109784.0 +err_fin 1561325.0 +sparsity check 0.39999266607420786 +time 137.79 +40 mlp.up_proj +Pruning ... +0.39999266607420786 0.19997432827949524 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337770887951855 0.9709505944546686 1.0 +err_prefin 2077427.0 +err_fin 1531031.5 +sparsity check 0.39999266607420786 +time 138.02 +40 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 237795.90625 +err_fin 223116.65625 +sparsity check 0.3999999931880406 +time 135.74 +41 self_attn.q_proj +Pruning ... +0.3999999761581421 0.09999999403953552 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502864520676674 0.9709505944546686 1.0 +err_prefin 454291.125 +err_fin 336475.4375 +sparsity check 0.3999999761581421 +time 74.95 +41 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 128970.75 +err_fin 107213.53125 +sparsity check 0.39999985694885254 +time 1.33 +41 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 98359.6640625 +err_fin 87798.2109375 +sparsity check 0.39999985694885254 +time 1.32 +41 self_attn.o_proj +Pruning ... +0.38767126202583313 0.08767127990722656 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.30993959080064 0.9709505944546686 1.0 +err_prefin 88793.8515625 +err_fin 42478.0703125 +sparsity check 0.38767126202583313 +time 67.88 +41 mlp.gate_proj +Pruning ... +0.39999266607420786 0.19997432827949524 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337770887951855 0.9709505944546686 1.0 +err_prefin 2213341.0 +err_fin 1599059.625 +sparsity check 0.39999266607420786 +time 137.77 +41 mlp.up_proj +Pruning ... +0.39999266607420786 0.19997432827949524 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337770887951855 0.9709505944546686 1.0 +err_prefin 2137321.75 +err_fin 1535685.0 +sparsity check 0.39999266607420786 +time 138.05 +41 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 277058.8125 +err_fin 257838.4375 +sparsity check 0.3999999931880406 +time 135.72 +42 self_attn.q_proj +Pruning ... +0.3999999761581421 0.09999999403953552 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502864520676674 0.9709505944546686 1.0 +err_prefin 519707.0625 +err_fin 379932.5625 +sparsity check 0.3999999761581421 +time 74.97 +42 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 132640.8125 +err_fin 109983.7734375 +sparsity check 0.39999985694885254 +time 1.33 +42 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 120522.5546875 +err_fin 108689.6484375 +sparsity check 0.39999985694885254 +time 1.30 +42 self_attn.o_proj +Pruning ... +0.38890962302684784 0.08890964090824127 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3141107083100954 0.9709505944546686 1.0 +err_prefin 78469.5859375 +err_fin 39123.4453125 +sparsity check 0.38890962302684784 +time 67.87 +42 mlp.gate_proj +Pruning ... +0.39999266607420786 0.19997432827949524 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337770887951855 0.9709505944546686 1.0 +err_prefin 2396299.5 +err_fin 1715538.75 +sparsity check 0.39999266607420786 +time 137.78 +42 mlp.up_proj +Pruning ... +0.39999266607420786 0.19997432827949524 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337770887951855 0.9709505944546686 1.0 +err_prefin 2258508.0 +err_fin 1608224.0 +sparsity check 0.39999266607420786 +time 138.04 +42 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 300451.25 +err_fin 279095.375 +sparsity check 0.3999999931880406 +time 135.72 +43 self_attn.q_proj +Pruning ... +0.3999999761581421 0.09999999403953552 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502864520676674 0.9709505944546686 1.0 +err_prefin 361255.3125 +err_fin 262990.71875 +sparsity check 0.3999999761581421 +time 74.96 +43 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 117103.0234375 +err_fin 96359.265625 +sparsity check 0.39999985694885254 +time 1.34 +43 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 96491.140625 +err_fin 84371.9375 +sparsity check 0.39999985694885254 +time 1.30 +43 self_attn.o_proj +Pruning ... +0.38483870029449463 0.08483871817588806 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3002942199017686 0.9709505944546686 1.0 +err_prefin 58077.65625 +err_fin 31061.318359375 +sparsity check 0.38483870029449463 +time 67.84 +43 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 2494903.0 +err_fin 1775033.75 +sparsity check 0.3999999931880406 +time 137.80 +43 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 2325850.5 +err_fin 1646176.75 +sparsity check 0.3999999931880406 +time 137.95 +43 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 330748.3125 +err_fin 306762.3125 +sparsity check 0.3999999931880406 +time 135.70 +44 self_attn.q_proj +Pruning ... +0.3999999761581421 0.09999999403953552 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502864520676674 0.9709505944546686 1.0 +err_prefin 630344.125 +err_fin 458141.21875 +sparsity check 0.3999999761581421 +time 74.96 +44 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 131598.953125 +err_fin 111503.53125 +sparsity check 0.39999985694885254 +time 1.32 +44 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 148579.09375 +err_fin 134851.46875 +sparsity check 0.39999985694885254 +time 1.33 +44 self_attn.o_proj +Pruning ... +0.39769303798675537 0.0976930558681488 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3429306804428647 0.9709505944546686 1.0 +err_prefin 156206.703125 +err_fin 83971.65625 +sparsity check 0.39769303798675537 +time 67.85 +44 mlp.gate_proj +Pruning ... +0.39999266607420786 0.19997432827949524 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337770887951855 0.9709505944546686 1.0 +err_prefin 2738918.75 +err_fin 1918952.75 +sparsity check 0.39999266607420786 +time 137.78 +44 mlp.up_proj +Pruning ... +0.39999266607420786 0.19997432827949524 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337770887951855 0.9709505944546686 1.0 +err_prefin 2463815.5 +err_fin 1714715.75 +sparsity check 0.39999266607420786 +time 138.15 +44 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 383154.3125 +err_fin 350069.21875 +sparsity check 0.3999999931880406 +time 135.36 +45 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 411766.8125 +err_fin 293813.1875 +sparsity check 0.39998745918273926 +time 74.93 +45 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 122174.71875 +err_fin 101486.4609375 +sparsity check 0.39999985694885254 +time 1.35 +45 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 137134.6875 +err_fin 124872.6953125 +sparsity check 0.39999985694885254 +time 1.32 +45 self_attn.o_proj +Pruning ... +0.3968270719051361 0.09682708978652954 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.340147122923055 0.9709505944546686 1.0 +err_prefin 41803.109375 +err_fin 22890.818359375 +sparsity check 0.3968270719051361 +time 67.80 +45 mlp.gate_proj +Pruning ... +0.39999266607420786 0.19997432827949524 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337770887951855 0.9709505944546686 1.0 +err_prefin 2962994.5 +err_fin 2079594.0 +sparsity check 0.39999266607420786 +time 137.80 +45 mlp.up_proj +Pruning ... +0.39998556886400494 0.1999494880437851 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337628919369338 0.9709505944546686 1.0 +err_prefin 2630788.25 +err_fin 1833110.0 +sparsity check 0.39998556886400494 +time 137.97 +45 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 393639.1875 +err_fin 360773.9375 +sparsity check 0.3999999931880406 +time 135.69 +46 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 343920.46875 +err_fin 246327.15625 +sparsity check 0.39998745918273926 +time 74.95 +46 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 98745.546875 +err_fin 84167.328125 +sparsity check 0.39999985694885254 +time 1.35 +46 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 123088.5 +err_fin 112754.875 +sparsity check 0.39999985694885254 +time 1.31 +46 self_attn.o_proj +Pruning ... +0.3924602270126343 0.09246024489402771 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3259197918896706 0.9709505944546686 1.0 +err_prefin 57249.734375 +err_fin 31896.0 +sparsity check 0.3924602270126343 +time 67.91 +46 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 3174013.75 +err_fin 2242079.0 +sparsity check 0.3999999931880406 +time 137.79 +46 mlp.up_proj +Pruning ... +0.39999266607420786 0.19997432827949524 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337770887951855 0.9709505944546686 1.0 +err_prefin 2772648.5 +err_fin 1945844.125 +sparsity check 0.39999266607420786 +time 137.99 +46 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 395180.625 +err_fin 364649.0625 +sparsity check 0.3999999931880406 +time 135.70 +47 self_attn.q_proj +Pruning ... +0.3999999761581421 0.09999999403953552 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502864520676674 0.9709505944546686 1.0 +err_prefin 468856.0625 +err_fin 337035.125 +sparsity check 0.3999999761581421 +time 74.95 +47 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 138887.5625 +err_fin 116853.6171875 +sparsity check 0.39999985694885254 +time 1.34 +47 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 114607.671875 +err_fin 104111.078125 +sparsity check 0.39999985694885254 +time 1.30 +47 self_attn.o_proj +Pruning ... +0.39365212619304657 0.09365214407444 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3298350005144441 0.9709505944546686 1.0 +err_prefin 96371.875 +err_fin 42798.21875 +sparsity check 0.39365212619304657 +time 67.85 +47 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 3290644.75 +err_fin 2296471.5 +sparsity check 0.3999999931880406 +time 137.79 +47 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 2834067.5 +err_fin 1964680.125 +sparsity check 0.3999999931880406 +time 137.97 +47 mlp.down_proj +Pruning ... +0.399999988930566 0.19999997317790985 0.34285713945116314 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917398757333 0.9709505944546686 1.0 +err_prefin 443383.6875 +err_fin 404481.4375 +sparsity check 0.399999988930566 +time 135.68 +48 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 238758.171875 +err_fin 169874.4375 +sparsity check 0.39998745918273926 +time 74.96 +48 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 67367.6953125 +err_fin 56036.6953125 +sparsity check 0.39999985694885254 +time 1.34 +48 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 115441.171875 +err_fin 105516.4609375 +sparsity check 0.39999985694885254 +time 1.31 +48 self_attn.o_proj +Pruning ... +0.3911074250936508 0.09110744297504425 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.32144643241679 0.9709505944546686 1.0 +err_prefin 56382.0859375 +err_fin 30507.6953125 +sparsity check 0.3911074250936508 +time 67.86 +48 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 3435194.75 +err_fin 2387344.75 +sparsity check 0.3999999931880406 +time 137.81 +48 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 2945057.5 +err_fin 2032755.5 +sparsity check 0.3999999931880406 +time 138.02 +48 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 443090.375 +err_fin 406116.25 +sparsity check 0.3999999931880406 +time 135.35 +49 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 139943.1875 +err_fin 99371.890625 +sparsity check 0.39998745918273926 +time 74.97 +49 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 54727.9921875 +err_fin 45486.75390625 +sparsity check 0.39999985694885254 +time 1.33 +49 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 92321.65625 +err_fin 82783.8984375 +sparsity check 0.39999985694885254 +time 1.32 +49 self_attn.o_proj +Pruning ... +0.3994888663291931 0.09948888421058655 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3486641752370372 0.9709505944546686 1.0 +err_prefin 24136.89453125 +err_fin 11916.1640625 +sparsity check 0.3994888663291931 +time 67.85 +49 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 3564298.75 +err_fin 2477942.5 +sparsity check 0.3999999931880406 +time 137.82 +49 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 3051650.25 +err_fin 2108091.5 +sparsity check 0.3999999931880406 +time 137.99 +49 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 444759.9375 +err_fin 409438.4375 +sparsity check 0.3999999931880406 +time 135.74 +50 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 210704.421875 +err_fin 149945.28125 +sparsity check 0.39998745918273926 +time 74.96 +50 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 60009.5078125 +err_fin 50233.8203125 +sparsity check 0.39999985694885254 +time 1.33 +50 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 118265.875 +err_fin 106615.8359375 +sparsity check 0.39999985694885254 +time 1.32 +50 self_attn.o_proj +Pruning ... +0.3945927917957306 0.09459280967712402 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.332907882956055 0.9709505944546686 1.0 +err_prefin 41071.29296875 +err_fin 22746.974609375 +sparsity check 0.3945927917957306 +time 67.86 +50 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 3691318.5 +err_fin 2572185.5 +sparsity check 0.3999999931880406 +time 137.79 +50 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 3151099.25 +err_fin 2181640.75 +sparsity check 0.3999999931880406 +time 138.17 +50 mlp.down_proj +Pruning ... +0.399999988930566 0.19999997317790985 0.34285713945116314 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917398757333 0.9709505944546686 1.0 +err_prefin 447227.28125 +err_fin 412591.90625 +sparsity check 0.399999988930566 +time 135.35 +51 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 271350.0625 +err_fin 193069.9375 +sparsity check 0.39998745918273926 +time 74.97 +51 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 82881.09375 +err_fin 70391.65625 +sparsity check 0.39999985694885254 +time 1.34 +51 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 124578.8125 +err_fin 112905.328125 +sparsity check 0.39999985694885254 +time 1.33 +51 self_attn.o_proj +Pruning ... +0.3849567323923111 0.08495675027370453 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.300699085947819 0.9709505944546686 1.0 +err_prefin 42985.53515625 +err_fin 21869.13671875 +sparsity check 0.3849567323923111 +time 67.89 +51 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 3787472.5 +err_fin 2648911.75 +sparsity check 0.3999999931880406 +time 137.81 +51 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 3242460.0 +err_fin 2253813.0 +sparsity check 0.3999999931880406 +time 138.05 +51 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 449872.78125 +err_fin 415985.8125 +sparsity check 0.3999999931880406 +time 135.71 +52 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 439324.875 +err_fin 315666.40625 +sparsity check 0.39998745918273926 +time 74.97 +52 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 95802.5 +err_fin 82492.7421875 +sparsity check 0.39999985694885254 +time 1.33 +52 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 146872.84375 +err_fin 133930.578125 +sparsity check 0.39999985694885254 +time 1.31 +52 self_attn.o_proj +Pruning ... +0.38917115330696106 0.08917117118835449 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3149881046089533 0.9709505944546686 1.0 +err_prefin 87225.6015625 +err_fin 43163.53125 +sparsity check 0.38917115330696106 +time 67.88 +52 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 3871781.0 +err_fin 2721833.75 +sparsity check 0.3999999931880406 +time 137.91 +52 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 3309534.75 +err_fin 2312042.0 +sparsity check 0.3999999931880406 +time 138.23 +52 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 457674.1875 +err_fin 423815.9375 +sparsity check 0.3999999931880406 +time 135.43 +53 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 147558.15625 +err_fin 106016.625 +sparsity check 0.39998745918273926 +time 74.97 +53 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 43785.1484375 +err_fin 36865.515625 +sparsity check 0.39999985694885254 +time 1.35 +53 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 115249.484375 +err_fin 105056.921875 +sparsity check 0.39999985694885254 +time 1.31 +53 self_attn.o_proj +Pruning ... +0.39926575124263763 0.09926576912403107 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3479546838848155 0.9709505944546686 1.0 +err_prefin 30559.916015625 +err_fin 15811.552734375 +sparsity check 0.39926575124263763 +time 67.89 +53 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 3969673.25 +err_fin 2786505.5 +sparsity check 0.3999999931880406 +time 137.94 +53 mlp.up_proj +Pruning ... +0.3999928959778377 0.1999751329421997 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337775486548314 0.9709505944546686 1.0 +err_prefin 3400897.5 +err_fin 2372340.5 +sparsity check 0.3999928959778377 +time 138.10 +53 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 463003.625 +err_fin 429105.5625 +sparsity check 0.3999999931880406 +time 135.72 +54 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 173454.875 +err_fin 124175.578125 +sparsity check 0.39998745918273926 +time 74.98 +54 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 49409.3046875 +err_fin 41089.84375 +sparsity check 0.39999985694885254 +time 1.36 +54 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 118842.3828125 +err_fin 108371.765625 +sparsity check 0.39999985694885254 +time 1.32 +54 self_attn.o_proj +Pruning ... +0.39291320741176605 0.09291322529315948 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3274106262053968 0.9709505944546686 1.0 +err_prefin 36517.9765625 +err_fin 18781.7265625 +sparsity check 0.39291320741176605 +time 67.88 +54 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 4057488.0 +err_fin 2857511.75 +sparsity check 0.3999999931880406 +time 137.92 +54 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 3488466.5 +err_fin 2440393.25 +sparsity check 0.3999999931880406 +time 138.12 +54 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 463141.15625 +err_fin 430254.9375 +sparsity check 0.3999999931880406 +time 135.75 +55 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 276099.8125 +err_fin 199511.765625 +sparsity check 0.39998745918273926 +time 74.98 +55 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 79841.265625 +err_fin 67909.1484375 +sparsity check 0.39999985694885254 +time 1.34 +55 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 154509.328125 +err_fin 141143.03125 +sparsity check 0.39999985694885254 +time 1.30 +55 self_attn.o_proj +Pruning ... +0.3909609913825989 0.09096100926399231 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.32096030981266 0.9709505944546686 1.0 +err_prefin 64654.1015625 +err_fin 33504.8125 +sparsity check 0.3909609913825989 +time 67.88 +55 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 4126911.25 +err_fin 2913544.0 +sparsity check 0.3999999931880406 +time 137.96 +55 mlp.up_proj +Pruning ... +0.3999931343964168 0.19997596740722656 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337780255445538 0.9709505944546686 1.0 +err_prefin 3576076.0 +err_fin 2509234.0 +sparsity check 0.3999931343964168 +time 138.18 +55 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 473575.40625 +err_fin 441082.65625 +sparsity check 0.3999999931880406 +time 135.51 +56 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 201423.84375 +err_fin 145764.46875 +sparsity check 0.39998745918273926 +time 74.97 +56 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 64095.1484375 +err_fin 53721.40625 +sparsity check 0.39999985694885254 +time 1.34 +56 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 108391.2578125 +err_fin 98795.03125 +sparsity check 0.39999985694885254 +time 1.31 +56 self_attn.o_proj +Pruning ... +0.394983634352684 0.09498365223407745 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3341802617171317 0.9709505944546686 1.0 +err_prefin 35854.6640625 +err_fin 17304.00390625 +sparsity check 0.394983634352684 +time 67.91 +56 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 4181935.75 +err_fin 2950946.75 +sparsity check 0.3999999931880406 +time 137.85 +56 mlp.up_proj +Pruning ... +0.3999931343964168 0.19997596740722656 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337780255445538 0.9709505944546686 1.0 +err_prefin 3632633.0 +err_fin 2548105.5 +sparsity check 0.3999931343964168 +time 138.07 +56 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 479367.90625 +err_fin 446711.625 +sparsity check 0.3999999931880406 +time 135.40 +57 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 193574.265625 +err_fin 139194.71875 +sparsity check 0.39998745918273926 +time 74.96 +57 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 59586.9765625 +err_fin 50896.7578125 +sparsity check 0.39999985694885254 +time 1.34 +57 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 140319.21875 +err_fin 128478.203125 +sparsity check 0.39999985694885254 +time 1.31 +57 self_attn.o_proj +Pruning ... +0.3990766406059265 0.09907665848731995 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3473526955921247 0.9709505944546686 1.0 +err_prefin 37232.3046875 +err_fin 19707.89453125 +sparsity check 0.3990766406059265 +time 67.86 +57 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 4315325.0 +err_fin 3052128.5 +sparsity check 0.3999999931880406 +time 137.92 +57 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 3757622.5 +err_fin 2641883.0 +sparsity check 0.3999999931880406 +time 138.13 +57 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 488616.46875 +err_fin 456110.0625 +sparsity check 0.3999999931880406 +time 135.46 +58 self_attn.q_proj +Pruning ... +0.3999880701303482 0.09998808801174164 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502487097151674 0.9709505944546686 1.0 +err_prefin 109906.765625 +err_fin 79257.90625 +sparsity check 0.3999880701303482 +time 74.97 +58 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 36524.1015625 +err_fin 29866.85546875 +sparsity check 0.39999985694885254 +time 1.36 +58 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 101192.5234375 +err_fin 91440.578125 +sparsity check 0.39999985694885254 +time 1.31 +58 self_attn.o_proj +Pruning ... +0.3996235728263855 0.09962359070777893 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.349092144873774 0.9709505944546686 1.0 +err_prefin 27671.78125 +err_fin 14613.060546875 +sparsity check 0.3996235728263855 +time 67.84 +58 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 4399088.0 +err_fin 3121391.5 +sparsity check 0.3999999931880406 +time 137.91 +58 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 3839110.5 +err_fin 2708475.5 +sparsity check 0.3999999931880406 +time 138.10 +58 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 492001.0 +err_fin 459838.25 +sparsity check 0.3999999931880406 +time 135.44 +59 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 140025.5 +err_fin 100983.1171875 +sparsity check 0.39998745918273926 +time 74.99 +59 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 42919.84375 +err_fin 36084.875 +sparsity check 0.39999985694885254 +time 1.35 +59 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 112994.859375 +err_fin 101844.03125 +sparsity check 0.39999985694885254 +time 1.33 +59 self_attn.o_proj +Pruning ... +0.3997947573661804 0.09979477524757385 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3496355861145926 0.9709505944546686 1.0 +err_prefin 29219.421875 +err_fin 14662.58984375 +sparsity check 0.3997947573661804 +time 67.83 +59 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 4492010.0 +err_fin 3196863.75 +sparsity check 0.3999999931880406 +time 137.86 +59 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 3931143.25 +err_fin 2782485.0 +sparsity check 0.3999999931880406 +time 138.13 +59 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 492903.5625 +err_fin 462014.5 +sparsity check 0.3999999931880406 +time 135.44 +60 self_attn.q_proj +Pruning ... +0.39995187520980835 0.09995189309120178 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.350133957117459 0.9709505944546686 1.0 +err_prefin 24846.77734375 +err_fin 17901.5859375 +sparsity check 0.39995187520980835 +time 74.97 +60 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 9290.47265625 +err_fin 7612.259765625 +sparsity check 0.39999985694885254 +time 1.33 +60 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 59974.8984375 +err_fin 54110.11328125 +sparsity check 0.39999985694885254 +time 1.31 +60 self_attn.o_proj +Pruning ... +0.3999999761581421 0.09999999403953552 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502864520676674 0.9709505944546686 1.0 +err_prefin 17134.6015625 +err_fin 9210.818359375 +sparsity check 0.3999999761581421 +time 67.82 +60 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 4548430.0 +err_fin 3252469.75 +sparsity check 0.3999999931880406 +time 137.90 +60 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 4000529.25 +err_fin 2845059.5 +sparsity check 0.3999999931880406 +time 138.32 +60 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 489890.625 +err_fin 460278.625 +sparsity check 0.3999999931880406 +time 135.45 +61 self_attn.q_proj +Pruning ... +0.39996403455734253 0.09996405243873596 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3501725095304327 0.9709505944546686 1.0 +err_prefin 90896.8515625 +err_fin 66454.25 +sparsity check 0.39996403455734253 +time 74.98 +61 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 30812.04296875 +err_fin 25593.125 +sparsity check 0.39999985694885254 +time 1.33 +61 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 102036.171875 +err_fin 93325.7578125 +sparsity check 0.39999985694885254 +time 1.31 +61 self_attn.o_proj +Pruning ... +0.39953969419002533 0.09953971207141876 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.348825692335962 0.9709505944546686 1.0 +err_prefin 25333.818359375 +err_fin 13513.7431640625 +sparsity check 0.39953969419002533 +time 67.81 +61 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 4626365.0 +err_fin 3314594.0 +sparsity check 0.3999999931880406 +time 137.87 +61 mlp.up_proj +Pruning ... +0.3999931343964168 0.19997596740722656 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337780255445538 0.9709505944546686 1.0 +err_prefin 4089394.75 +err_fin 2914332.5 +sparsity check 0.3999931343964168 +time 138.09 +61 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 492596.8125 +err_fin 463564.75 +sparsity check 0.3999999931880406 +time 135.76 +62 self_attn.q_proj +Pruning ... +0.3999880701303482 0.09998808801174164 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502487097151674 0.9709505944546686 1.0 +err_prefin 99086.1953125 +err_fin 72139.265625 +sparsity check 0.3999880701303482 +time 74.97 +62 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 34142.8046875 +err_fin 28503.0 +sparsity check 0.399999737739563 +time 1.34 +62 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 112070.03125 +err_fin 101395.1171875 +sparsity check 0.39999985694885254 +time 1.31 +62 self_attn.o_proj +Pruning ... +0.3999999761581421 0.09999999403953552 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502864520676674 0.9709505944546686 1.0 +err_prefin 23855.984375 +err_fin 11415.373046875 +sparsity check 0.3999999761581421 +time 67.82 +62 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 4720747.0 +err_fin 3396094.0 +sparsity check 0.3999999931880406 +time 137.92 +62 mlp.up_proj +Pruning ... +0.3999931343964168 0.19997596740722656 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337780255445538 0.9709505944546686 1.0 +err_prefin 4193333.75 +err_fin 3000677.5 +sparsity check 0.3999931343964168 +time 138.15 +62 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 498259.875 +err_fin 469256.65625 +sparsity check 0.3999999931880406 +time 135.88 +63 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 85960.1640625 +err_fin 63232.09765625 +sparsity check 0.39998745918273926 +time 74.95 +63 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 26819.83203125 +err_fin 22145.341796875 +sparsity check 0.39999985694885254 +time 1.36 +63 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 89955.7734375 +err_fin 80309.4609375 +sparsity check 0.39999985694885254 +time 1.32 +63 self_attn.o_proj +Pruning ... +0.3984784632921219 0.09847848117351532 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.345444731275777 0.9709505944546686 1.0 +err_prefin 17989.068359375 +err_fin 9419.4921875 +sparsity check 0.3984784632921219 +time 67.82 +63 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 4814154.0 +err_fin 3475047.5 +sparsity check 0.3999999931880406 +time 137.90 +63 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 4284651.5 +err_fin 3079046.75 +sparsity check 0.3999999931880406 +time 138.06 +63 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 504413.34375 +err_fin 475615.25 +sparsity check 0.3999999931880406 +time 135.35 +64 self_attn.q_proj +Pruning ... +0.3999755531549454 0.0999755710363388 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502090281957484 0.9709505944546686 1.0 +err_prefin 159849.96875 +err_fin 117469.234375 +sparsity check 0.3999755531549454 +time 74.95 +64 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 51413.63671875 +err_fin 42604.359375 +sparsity check 0.39999985694885254 +time 1.35 +64 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 124866.046875 +err_fin 114163.296875 +sparsity check 0.39999985694885254 +time 1.32 +64 self_attn.o_proj +Pruning ... +0.3999999761581421 0.09999999403953552 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502864520676674 0.9709505944546686 1.0 +err_prefin 35489.97265625 +err_fin 16909.37109375 +sparsity check 0.3999999761581421 +time 67.82 +64 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 4902354.0 +err_fin 3547474.5 +sparsity check 0.3999999931880406 +time 137.84 +64 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 4380570.5 +err_fin 3154890.5 +sparsity check 0.3999999931880406 +time 138.05 +64 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 512836.90625 +err_fin 483761.8125 +sparsity check 0.3999999931880406 +time 135.35 +65 self_attn.q_proj +Pruning ... +0.39996445178985596 0.09996446967124939 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.350173832365276 0.9709505944546686 1.0 +err_prefin 45725.81640625 +err_fin 33557.125 +sparsity check 0.39996445178985596 +time 74.94 +65 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 15090.453125 +err_fin 12062.2275390625 +sparsity check 0.39999985694885254 +time 1.33 +65 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 69568.328125 +err_fin 62171.89453125 +sparsity check 0.39999985694885254 +time 1.31 +65 self_attn.o_proj +Pruning ... +0.3986722081899643 0.09867222607135773 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3460633409565008 0.9709505944546686 1.0 +err_prefin 16426.189453125 +err_fin 8142.294921875 +sparsity check 0.3986722081899643 +time 67.88 +65 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 4979698.5 +err_fin 3616654.5 +sparsity check 0.3999999931880406 +time 137.92 +65 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 4469581.0 +err_fin 3233134.0 +sparsity check 0.3999999931880406 +time 138.15 +65 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 521956.0 +err_fin 492626.28125 +sparsity check 0.3999999931880406 +time 135.46 +66 self_attn.q_proj +Pruning ... +0.3999880701303482 0.09998808801174164 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502487097151674 0.9709505944546686 1.0 +err_prefin 82390.8046875 +err_fin 60246.546875 +sparsity check 0.3999880701303482 +time 74.96 +66 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 27396.1171875 +err_fin 22218.01171875 +sparsity check 0.39999985694885254 +time 1.32 +66 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 91186.625 +err_fin 83134.8125 +sparsity check 0.39999985694885254 +time 1.30 +66 self_attn.o_proj +Pruning ... +0.39990170300006866 0.0999017208814621 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3499748560894689 0.9709505944546686 1.0 +err_prefin 27952.85546875 +err_fin 15630.4541015625 +sparsity check 0.39990170300006866 +time 67.81 +66 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 5090460.0 +err_fin 3697180.5 +sparsity check 0.3999999931880406 +time 137.88 +66 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 4601931.0 +err_fin 3329092.75 +sparsity check 0.3999999931880406 +time 138.24 +66 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 545843.5 +err_fin 515164.53125 +sparsity check 0.3999999931880406 +time 135.36 +67 self_attn.q_proj +Pruning ... +0.3999880701303482 0.09998808801174164 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502487097151674 0.9709505944546686 1.0 +err_prefin 35176.55078125 +err_fin 25813.625 +sparsity check 0.3999880701303482 +time 74.97 +67 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 11708.529296875 +err_fin 8978.494140625 +sparsity check 0.39999985694885254 +time 1.35 +67 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 38922.94140625 +err_fin 33378.15625 +sparsity check 0.39999985694885254 +time 1.31 +67 self_attn.o_proj +Pruning ... +0.3936278820037842 0.09362789988517761 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3297556035346028 0.9709505944546686 1.0 +err_prefin 9967.0458984375 +err_fin 4096.6826171875 +sparsity check 0.3936278820037842 +time 67.86 +67 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 5132012.5 +err_fin 3736780.0 +sparsity check 0.3999999931880406 +time 137.93 +67 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 4676647.0 +err_fin 3389882.0 +sparsity check 0.3999999931880406 +time 138.14 +67 mlp.down_proj +Pruning ... +0.399999988930566 0.19999997317790985 0.34285713945116314 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917398757333 0.9709505944546686 1.0 +err_prefin 549749.75 +err_fin 518622.03125 +sparsity check 0.399999988930566 +time 135.45 +68 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 292117.0 +err_fin 215035.8125 +sparsity check 0.39998745918273926 +time 74.96 +68 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 88142.25 +err_fin 74237.4296875 +sparsity check 0.39999985694885254 +time 1.35 +68 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 180795.3125 +err_fin 166177.03125 +sparsity check 0.39999985694885254 +time 1.31 +68 self_attn.o_proj +Pruning ... +0.39890041947364807 0.0989004373550415 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.346791217688506 0.9709505944546686 1.0 +err_prefin 28715.28125 +err_fin 15530.28125 +sparsity check 0.39890041947364807 +time 67.83 +68 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 5310609.5 +err_fin 3873440.0 +sparsity check 0.3999999931880406 +time 137.78 +68 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 4870220.0 +err_fin 3537587.5 +sparsity check 0.3999999931880406 +time 138.03 +68 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 587633.875 +err_fin 553468.3125 +sparsity check 0.3999999931880406 +time 135.36 +69 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 483384.78125 +err_fin 357346.09375 +sparsity check 0.39998745918273926 +time 74.96 +69 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 158684.90625 +err_fin 134880.53125 +sparsity check 0.39999985694885254 +time 1.34 +69 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 241631.8125 +err_fin 220124.5625 +sparsity check 0.39999985694885254 +time 1.32 +69 self_attn.o_proj +Pruning ... +0.3949231803417206 0.09492319822311401 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3339836229488613 0.9709505944546686 1.0 +err_prefin 54212.2265625 +err_fin 26918.04296875 +sparsity check 0.3949231803417206 +time 67.85 +69 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 5407849.0 +err_fin 3944897.0 +sparsity check 0.3999999931880406 +time 137.88 +69 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 5016751.0 +err_fin 3643644.5 +sparsity check 0.3999999931880406 +time 138.15 +69 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 617275.625 +err_fin 581917.125 +sparsity check 0.3999999931880406 +time 135.40 +70 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 282138.375 +err_fin 207488.1875 +sparsity check 0.39998745918273926 +time 74.96 +70 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 89395.515625 +err_fin 73210.3203125 +sparsity check 0.39999985694885254 +time 1.33 +70 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 143147.21875 +err_fin 129311.0 +sparsity check 0.39999985694885254 +time 1.31 +70 self_attn.o_proj +Pruning ... +0.39024436473846436 0.09024438261985779 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3185758861734032 0.9709505944546686 1.0 +err_prefin 43651.9140625 +err_fin 20341.4765625 +sparsity check 0.39024436473846436 +time 67.87 +70 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 5526235.0 +err_fin 4027965.75 +sparsity check 0.3999999931880406 +time 137.82 +70 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 5155715.5 +err_fin 3744852.0 +sparsity check 0.3999999931880406 +time 138.02 +70 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 659116.25 +err_fin 620709.125 +sparsity check 0.3999999931880406 +time 135.35 +71 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 368707.46875 +err_fin 273084.25 +sparsity check 0.39998745918273926 +time 74.94 +71 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 119726.71875 +err_fin 99347.046875 +sparsity check 0.39999985694885254 +time 1.32 +71 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 149877.203125 +err_fin 136371.9375 +sparsity check 0.39999985694885254 +time 1.32 +71 self_attn.o_proj +Pruning ... +0.3951091170310974 0.09510913491249084 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3345882234903663 0.9709505944546686 1.0 +err_prefin 56630.87890625 +err_fin 26684.126953125 +sparsity check 0.3951091170310974 +time 67.87 +71 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 5691556.5 +err_fin 4140972.5 +sparsity check 0.3999999931880406 +time 137.80 +71 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 5325007.0 +err_fin 3861668.25 +sparsity check 0.3999999931880406 +time 138.01 +71 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 695043.375 +err_fin 653343.375 +sparsity check 0.3999999931880406 +time 135.77 +72 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 509801.3125 +err_fin 376212.1875 +sparsity check 0.39998745918273926 +time 74.95 +72 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 150016.125 +err_fin 128747.53125 +sparsity check 0.39999985694885254 +time 1.34 +72 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 172384.1875 +err_fin 157458.328125 +sparsity check 0.39999985694885254 +time 1.32 +72 self_attn.o_proj +Pruning ... +0.39744168519973755 0.09744170308113098 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3421240016705787 0.9709505944546686 1.0 +err_prefin 70598.375 +err_fin 38307.6796875 +sparsity check 0.39744168519973755 +time 67.79 +72 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 5828300.0 +err_fin 4228134.5 +sparsity check 0.3999999931880406 +time 137.79 +72 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 5491108.0 +err_fin 3969415.5 +sparsity check 0.3999999931880406 +time 138.18 +72 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 746648.0625 +err_fin 700877.0 +sparsity check 0.3999999931880406 +time 135.37 +73 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 477676.0625 +err_fin 350389.125 +sparsity check 0.39998745918273926 +time 74.97 +73 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 143966.09375 +err_fin 122261.875 +sparsity check 0.39999985694885254 +time 1.35 +73 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 200310.625 +err_fin 179147.703125 +sparsity check 0.39999985694885254 +time 1.32 +73 self_attn.o_proj +Pruning ... +0.3994404375553131 0.09944045543670654 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3485102430804012 0.9709505944546686 1.0 +err_prefin 46256.8046875 +err_fin 24534.345703125 +sparsity check 0.3994404375553131 +time 67.84 +73 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 5971108.5 +err_fin 4316203.0 +sparsity check 0.3999999931880406 +time 137.78 +73 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 5658571.5 +err_fin 4075587.0 +sparsity check 0.3999999931880406 +time 138.01 +73 mlp.down_proj +Pruning ... +0.399999988930566 0.19999997317790985 0.34285713945116314 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917398757333 0.9709505944546686 1.0 +err_prefin 821471.8125 +err_fin 766429.5 +sparsity check 0.399999988930566 +time 135.77 +74 self_attn.q_proj +Pruning ... +0.3999999761581421 0.09999999403953552 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502864520676674 0.9709505944546686 1.0 +err_prefin 441981.6875 +err_fin 319593.8125 +sparsity check 0.3999999761581421 +time 74.99 +74 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 131631.90625 +err_fin 108456.46875 +sparsity check 0.39999985694885254 +time 1.36 +74 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 152674.359375 +err_fin 135078.734375 +sparsity check 0.39999985694885254 +time 1.32 +74 self_attn.o_proj +Pruning ... +0.39621762931346893 0.09621764719486237 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3381807222541005 0.9709505944546686 1.0 +err_prefin 98191.1640625 +err_fin 43221.46875 +sparsity check 0.39621762931346893 +time 67.83 +74 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 6117716.5 +err_fin 4367414.0 +sparsity check 0.3999999931880406 +time 137.78 +74 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 5819283.0 +err_fin 4138981.0 +sparsity check 0.3999999931880406 +time 138.19 +74 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 906995.0 +err_fin 841319.5625 +sparsity check 0.3999999931880406 +time 135.34 +75 self_attn.q_proj +Pruning ... +0.3999999761581421 0.09999999403953552 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502864520676674 0.9709505944546686 1.0 +err_prefin 446136.75 +err_fin 319102.375 +sparsity check 0.3999999761581421 +time 74.99 +75 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 127815.8515625 +err_fin 102692.234375 +sparsity check 0.39999985694885254 +time 1.32 +75 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 181831.625 +err_fin 157889.078125 +sparsity check 0.39999985694885254 +time 1.31 +75 self_attn.o_proj +Pruning ... +0.3955359160900116 0.09553593397140503 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3359738332557134 0.9709505944546686 1.0 +err_prefin 91375.703125 +err_fin 40776.328125 +sparsity check 0.3955359160900116 +time 67.87 +75 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 6186535.0 +err_fin 4373899.0 +sparsity check 0.3999999931880406 +time 137.78 +75 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 5882684.0 +err_fin 4144648.0 +sparsity check 0.3999999931880406 +time 138.19 +75 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 1023284.6875 +err_fin 936964.875 +sparsity check 0.3999999931880406 +time 135.35 +76 self_attn.q_proj +Pruning ... +0.3999999761581421 0.09999999403953552 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502864520676674 0.9709505944546686 1.0 +err_prefin 681006.0625 +err_fin 474383.15625 +sparsity check 0.3999999761581421 +time 74.95 +76 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 187218.53125 +err_fin 149162.0625 +sparsity check 0.39999985694885254 +time 1.32 +76 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 274581.15625 +err_fin 235705.09375 +sparsity check 0.39999985694885254 +time 1.30 +76 self_attn.o_proj +Pruning ... +0.38502632081508636 0.0850263386964798 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3009376624713092 0.9709505944546686 1.0 +err_prefin 303418.03125 +err_fin 155106.640625 +sparsity check 0.38502632081508636 +time 67.87 +76 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 6181025.5 +err_fin 4269697.0 +sparsity check 0.3999999931880406 +time 137.79 +76 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 5839910.0 +err_fin 4014858.0 +sparsity check 0.3999999931880406 +time 138.07 +76 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 1254402.5 +err_fin 1118266.625 +sparsity check 0.3999999931880406 +time 135.33 +77 self_attn.q_proj +Pruning ... +0.3999999761581421 0.09999999403953552 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502864520676674 0.9709505944546686 1.0 +err_prefin 460530.625 +err_fin 307653.4375 +sparsity check 0.3999999761581421 +time 74.97 +77 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 136339.171875 +err_fin 103951.3203125 +sparsity check 0.39999985694885254 +time 1.32 +77 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 187703.84375 +err_fin 158275.296875 +sparsity check 0.39999985694885254 +time 1.33 +77 self_attn.o_proj +Pruning ... +0.3909844905138016 0.09098450839519501 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3210383461275814 0.9709505944546686 1.0 +err_prefin 134321.203125 +err_fin 50604.8671875 +sparsity check 0.3909844905138016 +time 67.90 +77 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 5808654.0 +err_fin 3888417.5 +sparsity check 0.3999999931880406 +time 137.84 +77 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 5495126.0 +err_fin 3658363.25 +sparsity check 0.3999999931880406 +time 138.21 +77 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 1474809.0 +err_fin 1266817.5 +sparsity check 0.3999999931880406 +time 135.37 +78 self_attn.q_proj +Pruning ... +0.3999999761581421 0.09999999403953552 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502864520676674 0.9709505944546686 1.0 +err_prefin 445713.0 +err_fin 273131.4375 +sparsity check 0.3999999761581421 +time 74.95 +78 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 141628.875 +err_fin 101864.96875 +sparsity check 0.39999985694885254 +time 1.33 +78 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 162669.1875 +err_fin 140059.9375 +sparsity check 0.39999985694885254 +time 1.30 +78 self_attn.o_proj +Pruning ... +0.397164523601532 0.09716454148292542 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3412332918506629 0.9709505944546686 1.0 +err_prefin 93672.1171875 +err_fin 37403.00390625 +sparsity check 0.397164523601532 +time 67.88 +78 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 4919987.0 +err_fin 3151665.5 +sparsity check 0.3999999931880406 +time 137.80 +78 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 4608632.0 +err_fin 2935308.5 +sparsity check 0.3999999931880406 +time 138.19 +78 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 1597177.75 +err_fin 1275748.25 +sparsity check 0.3999999931880406 +time 135.34 +79 self_attn.q_proj +Pruning ... +0.39998845756053925 0.09998847544193268 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502499379125963 0.9709505944546686 1.0 +err_prefin 290667.125 +err_fin 163923.90625 +sparsity check 0.39998845756053925 +time 74.94 +79 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 105166.34375 +err_fin 75287.1015625 +sparsity check 0.39999985694885254 +time 1.32 +79 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 74843.890625 +err_fin 59122.14453125 +sparsity check 0.39999985694885254 +time 1.30 +79 self_attn.o_proj +Pruning ... +0.3843609243631363 0.08436094224452972 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2986527357470568 0.9709505944546686 1.0 +err_prefin 27899.15234375 +err_fin 7628.357421875 +sparsity check 0.3843609243631363 +time 67.88 +79 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 3068756.0 +err_fin 1793884.75 +sparsity check 0.3999999931880406 +time 137.82 +79 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 2823493.25 +err_fin 1643121.375 +sparsity check 0.3999999931880406 +time 138.16 +79 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 1458193.5 +err_fin 930175.0 +sparsity check 0.3999999931880406 +time 135.36 +model.embed_tokens.weight tensor(2.5520e-06) +model.layers.0.self_attn.q_proj.weight tensor(4.8578e-06) +model.layers.0.self_attn.k_proj.weight tensor(0.0338) +model.layers.0.self_attn.v_proj.weight tensor(0.0718) +model.layers.0.self_attn.o_proj.weight tensor(4.2766e-06) +model.layers.0.mlp.gate_proj.weight tensor(2.7333e-06) +model.layers.0.mlp.up_proj.weight tensor(2.9887e-06) +model.layers.0.mlp.down_proj.weight tensor(0.0261) +50957.80866575241 +Dataset: wikitext2 +Evaluating ... +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +Perplexity: 4.265176 diff --git a/logs/llama2-70-0.6-no-final b/logs/llama2-70-0.6-no-final new file mode 100644 index 0000000..743693c --- /dev/null +++ b/logs/llama2-70-0.6-no-final @@ -0,0 +1,2897 @@ +Running on dev: cuda:0 +loading llama +llama loaded +Starting... on device cuda:0 +Ready. +0 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 17.612327575683594 +time 74.25 +0 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 31.51104736328125 +time 1.29 +0 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 5.1145734786987305 +time 1.29 +0 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 0.7482728362083435 +time 67.00 +0 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 472.1926574707031 +time 132.74 +0 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 473.89642333984375 +time 133.00 +0 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 8.996854782104492 +time 132.26 +1 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 322.709228515625 +time 74.37 +1 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 329.696533203125 +time 1.31 +1 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 36.43440628051758 +time 1.31 +1 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 25.482433319091797 +time 67.22 +1 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3330.572265625 +time 133.13 +1 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3652.72802734375 +time 133.58 +1 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 204.40255737304688 +time 132.78 +2 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 1244.264404296875 +time 74.41 +2 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 1398.383544921875 +time 1.33 +2 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 169.42843627929688 +time 1.32 +2 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 125.15476989746094 +time 67.33 +2 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 14816.5078125 +time 133.21 +2 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 15628.775390625 +time 133.57 +2 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 752.6139526367188 +time 132.81 +3 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 10678.88671875 +time 74.34 +3 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 7112.27734375 +time 1.34 +3 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 2342.0009765625 +time 1.32 +3 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 237.67080688476562 +time 67.23 +3 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 36961.9921875 +time 133.20 +3 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 37519.90625 +time 133.57 +3 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1129.751708984375 +time 132.77 +4 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 22656.783203125 +time 74.31 +4 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 10853.47265625 +time 1.32 +4 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 3980.1181640625 +time 1.32 +4 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 300.5199279785156 +time 67.29 +4 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 65355.2578125 +time 133.21 +4 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 64738.76953125 +time 133.56 +4 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1746.341796875 +time 132.74 +5 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 33657.921875 +time 74.28 +5 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 17652.955078125 +time 1.31 +5 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 5824.9716796875 +time 1.30 +5 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 491.3681640625 +time 67.19 +5 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 85074.578125 +time 133.13 +5 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 83678.984375 +time 133.49 +5 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2472.36328125 +time 132.72 +6 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 38911.2890625 +time 74.35 +6 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 19984.470703125 +time 1.34 +6 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 6915.70263671875 +time 1.32 +6 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 966.6387939453125 +time 67.27 +6 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 116079.0234375 +time 133.18 +6 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 113430.8984375 +time 133.54 +6 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3741.281494140625 +time 132.71 +7 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 63400.828125 +time 74.18 +7 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 30530.984375 +time 1.31 +7 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 9929.8046875 +time 1.29 +7 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 1133.021484375 +time 67.14 +7 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 146139.15625 +time 132.95 +7 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 142522.75 +time 133.31 +7 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 5315.3740234375 +time 132.51 +8 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 55419.0 +time 74.18 +8 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 26642.69921875 +time 1.31 +8 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 9577.478515625 +time 1.31 +8 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 1881.4942626953125 +time 67.13 +8 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 205496.90625 +time 132.88 +8 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 196712.40625 +time 133.16 +8 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 9158.251953125 +time 132.47 +9 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 62077.3515625 +time 74.20 +9 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 30216.158203125 +time 1.33 +9 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 14768.783203125 +time 1.31 +9 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 1096.98828125 +time 67.09 +9 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 267625.78125 +time 132.90 +9 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 254397.75 +time 133.21 +9 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 9453.677734375 +time 132.43 +10 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 36566.9453125 +time 74.19 +10 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 14957.2578125 +time 1.32 +10 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 14397.6748046875 +time 1.32 +10 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 745.6788940429688 +time 67.11 +10 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 330457.625 +time 132.92 +10 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 312773.96875 +time 133.27 +10 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 10327.0927734375 +time 132.47 +11 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 59660.21875 +time 74.18 +11 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 30841.203125 +time 1.31 +11 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 16662.66015625 +time 1.30 +11 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 1191.482421875 +time 67.06 +11 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 357958.4375 +time 132.88 +11 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 340149.3125 +time 133.22 +11 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 11469.671875 +time 132.46 +12 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 51142.75 +time 74.14 +12 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 27139.501953125 +time 1.32 +12 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 12346.076171875 +time 1.30 +12 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 1558.240478515625 +time 67.09 +12 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 354703.1875 +time 132.86 +12 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 339935.875 +time 133.19 +12 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 12818.328125 +time 132.46 +13 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 65248.85546875 +time 74.20 +13 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 29296.85546875 +time 1.31 +13 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 16889.134765625 +time 1.30 +13 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 3927.85791015625 +time 67.10 +13 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 375438.25 +time 132.88 +13 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 363148.0625 +time 133.23 +13 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 16453.6328125 +time 132.43 +14 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 108937.71875 +time 74.18 +14 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 49773.47265625 +time 1.31 +14 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 23565.927734375 +time 1.30 +14 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 4765.5234375 +time 67.15 +14 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 437450.4375 +time 132.88 +14 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 422844.59375 +time 133.26 +14 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 17558.125 +time 132.46 +15 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 79193.8984375 +time 74.16 +15 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 34913.6875 +time 1.33 +15 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 29505.78515625 +time 1.30 +15 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 4935.31298828125 +time 67.10 +15 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 467670.09375 +time 132.93 +15 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 454470.0 +time 133.30 +15 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 20961.91796875 +time 132.56 +16 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 77807.2890625 +time 74.20 +16 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 36305.625 +time 1.33 +16 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 27183.01171875 +time 1.31 +16 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 4128.67822265625 +time 67.15 +16 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 495834.625 +time 133.00 +16 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 483090.375 +time 133.37 +16 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 20891.5625 +time 132.53 +17 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 100351.65625 +time 74.21 +17 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 47200.68359375 +time 1.32 +17 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 23141.37890625 +time 1.31 +17 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 6387.916015625 +time 67.16 +17 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 486892.5 +time 132.90 +17 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 475838.3125 +time 133.24 +17 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 27100.16015625 +time 132.38 +18 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 93711.421875 +time 74.16 +18 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 41831.859375 +time 1.31 +18 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 36948.33203125 +time 1.31 +18 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 6310.619140625 +time 67.14 +18 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 538415.4375 +time 132.90 +18 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 523063.0625 +time 133.15 +18 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 29556.50390625 +time 132.49 +19 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 113524.328125 +time 74.19 +19 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 49532.1015625 +time 1.30 +19 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 44482.59375 +time 1.31 +19 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 5303.1357421875 +time 67.16 +19 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 585978.25 +time 132.88 +19 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 569840.875 +time 133.26 +19 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 29731.625 +time 132.52 +20 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 100755.21875 +time 74.21 +20 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 42360.0625 +time 1.33 +20 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 36447.08203125 +time 1.32 +20 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 3984.774658203125 +time 67.17 +20 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 633971.25 +time 132.90 +20 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 617294.625 +time 133.20 +20 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 30067.0078125 +time 132.44 +21 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 61406.9296875 +time 74.19 +21 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 23238.681640625 +time 1.32 +21 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 31092.671875 +time 1.30 +21 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 5884.35009765625 +time 67.08 +21 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 661759.1875 +time 132.86 +21 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 647559.75 +time 133.24 +21 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 31720.4453125 +time 132.47 +22 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 59322.75 +time 74.20 +22 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 27160.921875 +time 1.31 +22 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 35606.453125 +time 1.31 +22 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 4092.3671875 +time 67.15 +22 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 697785.9375 +time 132.88 +22 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 683844.1875 +time 133.22 +22 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 32690.904296875 +time 132.37 +23 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 87082.828125 +time 74.03 +23 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 43012.2578125 +time 1.30 +23 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 37611.08203125 +time 1.30 +23 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 5409.3359375 +time 67.00 +23 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 718901.1875 +time 132.71 +23 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 706408.125 +time 133.06 +23 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 33584.59375 +time 132.28 +24 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 77167.984375 +time 74.24 +24 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 37746.296875 +time 1.31 +24 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 27919.7109375 +time 1.30 +24 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 4681.7431640625 +time 67.21 +24 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 699628.9375 +time 133.10 +24 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 691800.0 +time 133.35 +24 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 33803.04296875 +time 132.64 +25 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 87325.171875 +time 74.24 +25 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 42500.3125 +time 1.31 +25 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 29050.744140625 +time 1.30 +25 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 11818.7099609375 +time 67.20 +25 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 654517.125 +time 133.11 +25 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 651695.375 +time 133.45 +25 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 47636.0859375 +time 132.68 +26 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 130363.6328125 +time 74.30 +26 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 62085.8203125 +time 1.30 +26 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 43831.43359375 +time 1.31 +26 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 12798.46875 +time 67.24 +26 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 748994.75 +time 133.22 +26 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 742715.75 +time 133.55 +26 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 46746.4375 +time 132.76 +27 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 57723.2578125 +time 74.30 +27 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 27932.28515625 +time 1.31 +27 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 37825.0703125 +time 1.32 +27 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 5532.9150390625 +time 67.29 +27 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 795661.875 +time 133.23 +27 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 788209.0 +time 133.57 +27 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 49022.3125 +time 132.79 +28 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 106256.9921875 +time 74.18 +28 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 54390.6484375 +time 1.34 +28 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 57257.546875 +time 1.30 +28 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 9016.744140625 +time 67.15 +28 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 840818.375 +time 132.94 +28 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 833063.1875 +time 133.19 +28 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 51601.015625 +time 132.47 +29 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 104595.203125 +time 74.29 +29 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 47897.2421875 +time 1.31 +29 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 62577.046875 +time 1.30 +29 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 10879.46484375 +time 67.25 +29 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 892475.9375 +time 133.22 +29 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 882220.875 +time 133.58 +29 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 54406.44140625 +time 132.81 +30 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 52978.29296875 +time 74.31 +30 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 20202.720703125 +time 1.33 +30 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 57756.62890625 +time 1.31 +30 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 5304.2724609375 +time 67.23 +30 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 946987.125 +time 133.20 +30 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 933448.5625 +time 133.55 +30 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 56272.2265625 +time 132.77 +31 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 109427.0546875 +time 74.29 +31 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 48981.40625 +time 1.32 +31 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 61405.2109375 +time 1.31 +31 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 10939.416015625 +time 66.95 +31 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 991568.625 +time 132.51 +31 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 975173.8125 +time 132.88 +31 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 57463.484375 +time 132.16 +32 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 126923.3125 +time 74.24 +32 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 59173.9296875 +time 1.32 +32 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 55641.9375 +time 1.30 +32 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 7182.63232421875 +time 67.17 +32 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1016079.75 +time 132.95 +32 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1001333.125 +time 133.21 +32 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 60604.0625 +time 132.44 +33 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 180704.390625 +time 74.20 +33 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 78881.8984375 +time 1.33 +33 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 61134.00390625 +time 1.31 +33 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 14254.791015625 +time 67.10 +33 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1024896.375 +time 132.88 +33 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1012723.375 +time 133.22 +33 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 69529.09375 +time 132.36 +34 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 61557.296875 +time 74.18 +34 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 25669.53515625 +time 1.36 +34 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 36394.3125 +time 1.32 +34 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 9210.3095703125 +time 67.13 +34 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1058720.0 +time 132.87 +34 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1045177.75 +time 133.11 +34 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 75892.5 +time 132.35 +35 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 104925.34375 +time 74.15 +35 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 42556.8515625 +time 1.31 +35 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 49808.2109375 +time 1.31 +35 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 11742.796875 +time 67.12 +35 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1124256.875 +time 132.88 +35 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1110023.5 +time 133.16 +35 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 80856.0234375 +time 132.38 +36 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 100388.4609375 +time 74.01 +36 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 41796.96484375 +time 1.32 +36 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 48720.4453125 +time 1.30 +36 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 7499.4580078125 +time 66.95 +36 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1149486.25 +time 132.54 +36 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1136073.5 +time 133.03 +36 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 90240.140625 +time 132.09 +37 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 160800.546875 +time 74.09 +37 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 69916.5625 +time 1.31 +37 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 62135.01171875 +time 1.30 +37 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 13930.455078125 +time 67.07 +37 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1199847.875 +time 132.88 +37 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1185947.0 +time 133.22 +37 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 98375.5859375 +time 132.39 +38 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 180662.359375 +time 74.13 +38 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 77058.484375 +time 1.32 +38 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 64838.8828125 +time 1.29 +38 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 22819.05859375 +time 67.12 +38 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1234275.625 +time 132.86 +38 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1223039.125 +time 133.16 +38 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 110680.078125 +time 132.36 +39 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 221250.59375 +time 74.14 +39 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 102464.453125 +time 1.33 +39 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 70258.5 +time 1.31 +39 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 26567.326171875 +time 67.09 +39 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1243297.75 +time 132.83 +39 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1233616.125 +time 133.19 +39 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 135681.84375 +time 132.38 +40 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 206396.625 +time 74.00 +40 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 88803.6953125 +time 1.31 +40 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 80005.4140625 +time 1.30 +40 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 39003.3046875 +time 67.00 +40 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1288138.25 +time 132.61 +40 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1276775.25 +time 133.00 +40 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 154915.09375 +time 131.79 +41 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 166153.84375 +time 74.16 +41 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 69553.84375 +time 1.32 +41 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 69993.46875 +time 1.31 +41 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 43492.41015625 +time 67.08 +41 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1337140.875 +time 132.84 +41 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1305336.0 +time 133.29 +41 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 183269.5625 +time 132.45 +42 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 192392.625 +time 74.18 +42 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 75512.3359375 +time 1.32 +42 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 86431.2265625 +time 1.31 +42 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 39116.44140625 +time 67.09 +42 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1442253.25 +time 132.84 +42 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1377874.0 +time 133.33 +42 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 197992.65625 +time 132.48 +43 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 132773.1875 +time 74.17 +43 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 61471.68359375 +time 1.33 +43 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 67454.6875 +time 1.30 +43 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 29826.2265625 +time 67.10 +43 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1498712.75 +time 132.86 +43 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1418535.25 +time 133.23 +43 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 219049.484375 +time 132.40 +44 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 240496.484375 +time 74.17 +44 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 79229.8359375 +time 1.32 +44 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 104943.875 +time 1.30 +44 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 67515.375 +time 67.12 +44 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1641352.375 +time 132.86 +44 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1503583.0 +time 133.21 +44 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 256954.328125 +time 132.38 +45 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 150158.953125 +time 74.16 +45 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 64959.00390625 +time 1.33 +45 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 102122.0 +time 1.31 +45 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 21533.6875 +time 67.08 +45 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1778674.25 +time 132.87 +45 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1609157.75 +time 133.24 +45 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 263130.34375 +time 132.38 +46 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 126961.8515625 +time 74.16 +46 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 51756.03515625 +time 1.33 +46 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 88172.375 +time 1.30 +46 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 28422.970703125 +time 67.09 +46 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1911585.25 +time 132.88 +46 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1703843.375 +time 133.23 +46 mlp.down_proj +Pruning ... +0.39999998467309134 0.2499999701976776 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1452538745832397 0.9709505944546686 1.0 +err_prefin 263536.6875 +time 132.29 +47 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 173263.8125 +time 74.18 +47 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 75120.765625 +time 1.32 +47 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 85955.203125 +time 1.31 +47 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 42869.1015625 +time 67.13 +47 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1978742.5 +time 132.83 +47 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1741335.5 +time 133.19 +47 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 297206.8125 +time 132.49 +48 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 85451.9375 +time 74.18 +48 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 28746.822265625 +time 1.32 +48 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 83795.6484375 +time 1.30 +48 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 28718.93359375 +time 67.13 +48 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2072749.25 +time 132.88 +48 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1816530.75 +time 133.19 +48 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 296262.71875 +time 132.30 +49 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 47280.34375 +time 74.17 +49 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 19258.259765625 +time 1.32 +49 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 67885.0 +time 1.31 +49 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 10816.5068359375 +time 67.11 +49 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2155661.5 +time 132.85 +49 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1886680.625 +time 133.26 +49 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 296223.4375 +time 132.35 +50 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 76488.140625 +time 74.18 +50 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 25997.84375 +time 1.32 +50 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 87406.3125 +time 1.30 +50 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 21289.626953125 +time 67.10 +50 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2241165.5 +time 132.87 +50 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1952781.5 +time 133.25 +50 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 297756.46875 +time 132.38 +51 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 99703.515625 +time 74.17 +51 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 38889.2421875 +time 1.32 +51 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 91222.671875 +time 1.31 +51 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 19600.505859375 +time 67.12 +51 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2306488.75 +time 132.85 +51 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2014633.75 +time 133.21 +51 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 297955.40625 +time 132.35 +52 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 168291.375 +time 74.16 +52 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 50932.2890625 +time 1.33 +52 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 104135.4375 +time 1.31 +52 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 36862.6953125 +time 67.13 +52 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2366128.0 +time 132.86 +52 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2063428.875 +time 133.21 +52 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 304441.96875 +time 132.46 +53 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 50769.15625 +time 74.16 +53 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 15295.44140625 +time 1.32 +53 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 87299.375 +time 1.31 +53 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 15707.03125 +time 67.08 +53 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2430075.0 +time 132.86 +53 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2123971.5 +time 133.18 +53 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 307674.5 +time 132.34 +54 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 59797.8046875 +time 74.17 +54 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 17676.337890625 +time 1.31 +54 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 89841.21875 +time 1.31 +54 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 17624.03515625 +time 67.13 +54 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2491099.5 +time 132.86 +54 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2182816.75 +time 133.24 +54 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 307152.4375 +time 132.47 +55 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 98281.03125 +time 74.13 +55 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 34463.125 +time 1.32 +55 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 117563.0 +time 1.30 +55 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 35912.5390625 +time 67.05 +55 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2539226.0 +time 132.86 +55 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2240838.25 +time 133.19 +55 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 314594.53125 +time 132.37 +56 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 73793.3125 +time 74.18 +56 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 27353.845703125 +time 1.31 +56 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 82864.546875 +time 1.29 +56 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 18498.107421875 +time 67.12 +56 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2577073.5 +time 132.84 +56 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2278607.5 +time 133.10 +56 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 318420.875 +time 132.37 +57 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 65738.078125 +time 74.16 +57 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 20980.23828125 +time 1.33 +57 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 106542.828125 +time 1.30 +57 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 19532.16796875 +time 67.11 +57 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2664942.0 +time 132.87 +57 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2360458.5 +time 133.24 +57 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 323856.625 +time 132.46 +58 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 36086.94921875 +time 74.19 +58 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 10523.658203125 +time 1.33 +58 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 76194.921875 +time 1.31 +58 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 12682.658203125 +time 67.13 +58 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2722121.0 +time 132.89 +58 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2416679.5 +time 133.28 +58 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 325329.375 +time 132.41 +59 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 46806.7265625 +time 74.15 +59 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 13380.953125 +time 1.32 +59 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 84532.6875 +time 1.31 +59 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 12104.3583984375 +time 67.07 +59 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2788820.75 +time 132.85 +59 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2478200.5 +time 133.25 +59 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 324977.9375 +time 132.39 +60 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 7371.5859375 +time 74.18 +60 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 1918.373779296875 +time 1.32 +60 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 45889.3359375 +time 1.31 +60 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 7125.640625 +time 67.15 +60 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2827728.0 +time 132.91 +60 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2526013.25 +time 133.28 +60 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 322436.03125 +time 131.98 +61 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 29236.283203125 +time 74.19 +61 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 7915.310546875 +time 1.31 +61 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 78067.921875 +time 1.30 +61 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 11780.419921875 +time 67.12 +61 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2881668.25 +time 132.90 +61 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2586985.5 +time 133.26 +61 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 323645.25 +time 132.36 +62 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 31343.787109375 +time 74.19 +62 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 9455.978515625 +time 1.33 +62 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 84606.421875 +time 1.31 +62 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 9645.7236328125 +time 67.06 +62 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2944337.75 +time 132.85 +62 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2652504.0 +time 133.21 +62 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 326877.1875 +time 132.49 +63 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 28740.30078125 +time 74.16 +63 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 7151.73046875 +time 1.31 +63 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 65387.8359375 +time 1.30 +63 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 8003.251953125 +time 67.08 +63 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3011278.0 +time 132.83 +63 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2716184.5 +time 133.23 +63 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 330764.0625 +time 132.41 +64 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 54376.578125 +time 74.14 +64 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 16704.046875 +time 1.32 +64 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 95303.6953125 +time 1.31 +64 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 16367.1484375 +time 67.06 +64 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3071841.0 +time 132.85 +64 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2780696.5 +time 133.23 +64 mlp.down_proj +Pruning ... +0.39999998467309134 0.2499999850988388 0.32857141750199453 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253876941607 0.9709505944546686 1.0 +err_prefin 335757.34375 +time 132.46 +65 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 14479.54296875 +time 74.15 +65 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 3315.9599609375 +time 1.31 +65 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 52430.859375 +time 1.30 +65 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 6687.37158203125 +time 67.10 +65 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3122902.0 +time 132.85 +65 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2839093.5 +time 133.20 +65 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 340854.46875 +time 132.36 +66 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 24763.19921875 +time 74.15 +66 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 6424.70751953125 +time 1.31 +66 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 69690.8828125 +time 1.30 +66 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 14345.8642578125 +time 67.12 +66 mlp.gate_proj +Pruning ... +0.39999998467309134 0.2499999701976776 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1452538745832397 0.9709505944546686 1.0 +err_prefin 3198961.75 +time 132.83 +66 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2925467.0 +time 133.20 +66 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 357014.3125 +time 132.38 +67 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 11864.314453125 +time 74.16 +67 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 2948.201171875 +time 1.32 +67 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 27720.13671875 +time 1.31 +67 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 4095.9384765625 +time 67.09 +67 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3223908.25 +time 132.86 +67 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2971735.25 +time 133.20 +67 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 357864.375 +time 132.34 +68 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 104560.09375 +time 74.17 +68 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 38269.0546875 +time 1.33 +68 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 137282.203125 +time 1.30 +68 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 15658.8642578125 +time 67.14 +68 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3347816.0 +time 132.88 +68 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3099588.75 +time 133.25 +68 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 384504.1875 +time 132.38 +69 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 179176.3125 +time 74.15 +69 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 86904.4140625 +time 1.32 +69 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 180676.28125 +time 1.30 +69 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 25021.240234375 +time 67.09 +69 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3413536.0 +time 132.88 +69 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3192965.0 +time 133.26 +69 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 403899.0 +time 132.39 +70 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 100739.75 +time 74.20 +70 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 38826.25 +time 1.33 +70 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 102769.8203125 +time 1.30 +70 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 18622.0390625 +time 67.12 +70 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3491604.5 +time 132.86 +70 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3285257.5 +time 133.36 +70 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 431766.75 +time 132.32 +71 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 139430.90625 +time 74.17 +71 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 65701.375 +time 1.31 +71 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 110579.6484375 +time 1.30 +71 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 24041.388671875 +time 67.11 +71 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3603060.75 +time 132.87 +71 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3395588.5 +time 133.19 +71 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 456766.0 +time 132.33 +72 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 196384.375 +time 74.21 +72 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 92021.578125 +time 1.32 +72 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 129053.34375 +time 1.31 +72 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 31116.056640625 +time 67.11 +72 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3686122.5 +time 132.90 +72 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3498898.5 +time 133.15 +72 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 491849.6875 +time 132.31 +73 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 182664.953125 +time 74.16 +73 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 89948.421875 +time 1.33 +73 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 147008.59375 +time 1.31 +73 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 20152.54296875 +time 67.12 +73 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3775901.0 +time 132.87 +73 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3598868.5 +time 133.26 +73 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 542224.375 +time 132.44 +74 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 165940.3125 +time 74.19 +74 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 75133.015625 +time 1.33 +74 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 111994.921875 +time 1.31 +74 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 41080.0625 +time 67.13 +74 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3864722.0 +time 132.88 +74 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3695610.5 +time 133.24 +74 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 599601.125 +time 132.37 +75 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 168422.0625 +time 74.16 +75 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 73985.390625 +time 1.32 +75 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 126582.671875 +time 1.29 +75 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 37896.7421875 +time 67.14 +75 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3896304.0 +time 132.89 +75 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3721670.0 +time 133.25 +75 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 673825.3125 +time 132.42 +76 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 253578.78125 +time 74.18 +76 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 108152.5625 +time 1.32 +76 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 202025.6875 +time 1.30 +76 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 124319.3671875 +time 67.15 +76 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3856496.5 +time 132.88 +76 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3656621.0 +time 133.26 +76 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 813325.5 +time 132.37 +77 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 167027.765625 +time 74.20 +77 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 77202.765625 +time 1.31 +77 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 135316.703125 +time 1.31 +77 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 54281.9140625 +time 67.13 +77 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3588503.25 +time 132.91 +77 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3403949.5 +time 133.28 +77 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 934467.25 +time 132.35 +78 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 148941.203125 +time 74.19 +78 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 73460.6328125 +time 1.33 +78 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 119373.671875 +time 1.31 +78 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 33232.4609375 +time 67.18 +78 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2989115.0 +time 132.91 +78 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2800694.0 +time 133.28 +78 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 982731.75 +time 132.28 +79 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 93165.2265625 +time 74.18 +79 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 57591.75 +time 1.34 +79 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 52218.36328125 +time 1.31 +79 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 9484.1318359375 +time 67.13 +79 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1811554.75 +time 132.96 +79 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1657092.0 +time 133.33 +79 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 818149.125 +time 132.49 +model.embed_tokens.weight tensor(2.5520e-06) +model.layers.0.self_attn.q_proj.weight tensor(0.0106) +model.layers.0.self_attn.k_proj.weight tensor(0.0180) +model.layers.0.self_attn.v_proj.weight tensor(0.0582) +model.layers.0.self_attn.o_proj.weight tensor(4.1723e-06) +model.layers.0.mlp.gate_proj.weight tensor(0.0001) +model.layers.0.mlp.up_proj.weight tensor(0.0001) +model.layers.0.mlp.down_proj.weight tensor(0.0088) +49795.538183927536 +Dataset: wikitext2 +Evaluating ... +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +Perplexity: 3.814271 diff --git a/logs/llama2-70-0.7 b/logs/llama2-70-0.7 new file mode 100644 index 0000000..e7c8c9a --- /dev/null +++ b/logs/llama2-70-0.7 @@ -0,0 +1,4017 @@ +Running on dev: cuda:0 +loading llama +llama loaded +Starting... on device cuda:0 +Ready. +0 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 31.5526065826416 +err_fin 13.801063537597656 +sparsity check 0.2999999672174454 +time 75.95 +0 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 52.44609832763672 +err_fin 19.002490997314453 +sparsity check 0.2999997138977051 +time 1.32 +0 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 11.234882354736328 +err_fin 6.049235820770264 +sparsity check 0.2999997138977051 +time 1.32 +0 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 2.254166603088379 +err_fin 0.19925557076931 +sparsity check 0.2999999672174454 +time 68.70 +0 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 998.2425537109375 +err_fin 420.0853271484375 +sparsity check 0.2999999906335558 +time 138.62 +0 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1007.9627075195312 +err_fin 419.9471130371094 +sparsity check 0.2999999906335558 +time 138.91 +0 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 19.838790893554688 +err_fin 9.329728126525879 +sparsity check 0.2999999906335558 +time 136.67 +1 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 559.4142456054688 +err_fin 195.34976196289062 +sparsity check 0.2999999672174454 +time 75.94 +1 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 666.58203125 +err_fin 232.85142517089844 +sparsity check 0.2999997138977051 +time 1.34 +1 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 80.73096466064453 +err_fin 40.32262420654297 +sparsity check 0.2999997138977051 +time 1.34 +1 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 66.36351776123047 +err_fin 16.87843132019043 +sparsity check 0.2999999672174454 +time 68.82 +1 mlp.gate_proj +Pruning ... +0.2999999863760812 0.2499999850988388 0.2285714192049844 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063847936203 0.9709505944546686 1.0 +err_prefin 7393.64404296875 +err_fin 2988.91845703125 +sparsity check 0.2999999863760812 +time 138.78 +1 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 8189.357421875 +err_fin 3221.509521484375 +sparsity check 0.2999999906335558 +time 139.09 +1 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 407.28009033203125 +err_fin 318.79150390625 +sparsity check 0.2999999906335558 +time 136.83 +2 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 2488.8447265625 +err_fin 1380.255859375 +sparsity check 0.2999999672174454 +time 76.06 +2 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 2615.38037109375 +err_fin 1290.1136474609375 +sparsity check 0.2999997138977051 +time 1.35 +2 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 355.3388671875 +err_fin 225.45562744140625 +sparsity check 0.2999997138977051 +time 1.34 +2 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 306.25860595703125 +err_fin 133.38070678710938 +sparsity check 0.2999999672174454 +time 68.94 +2 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 32337.75390625 +err_fin 16730.00390625 +sparsity check 0.2999999906335558 +time 138.81 +2 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 34219.4921875 +err_fin 17309.54296875 +sparsity check 0.2999999906335558 +time 139.17 +2 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1462.96923828125 +err_fin 1230.7198486328125 +sparsity check 0.2999999906335558 +time 136.94 +3 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 23905.779296875 +err_fin 15115.546875 +sparsity check 0.2999999672174454 +time 75.97 +3 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 14812.9462890625 +err_fin 9731.453125 +sparsity check 0.2999997138977051 +time 1.38 +3 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 4812.263671875 +err_fin 3350.51123046875 +sparsity check 0.2999997138977051 +time 1.33 +3 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 553.1149291992188 +err_fin 313.97454833984375 +sparsity check 0.2999999672174454 +time 68.91 +3 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 78134.578125 +err_fin 46317.03125 +sparsity check 0.2999999906335558 +time 138.82 +3 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 79425.5546875 +err_fin 46321.234375 +sparsity check 0.2999999906335558 +time 139.23 +3 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2157.4443359375 +err_fin 1896.1953125 +sparsity check 0.2999999906335558 +time 136.80 +4 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 49349.1171875 +err_fin 32225.453125 +sparsity check 0.2999999672174454 +time 75.93 +4 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 21972.5625 +err_fin 15072.583984375 +sparsity check 0.2999997138977051 +time 1.35 +4 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 7872.7421875 +err_fin 5850.16162109375 +sparsity check 0.2999997138977051 +time 1.34 +4 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 687.8417358398438 +err_fin 407.13275146484375 +sparsity check 0.2999999672174454 +time 68.89 +4 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 134429.28125 +err_fin 86310.09375 +sparsity check 0.2999999906335558 +time 138.78 +4 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 133324.328125 +err_fin 84613.71875 +sparsity check 0.2999999906335558 +time 139.11 +4 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3295.36669921875 +err_fin 2941.990966796875 +sparsity check 0.2999999906335558 +time 136.62 +5 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 74200.90625 +err_fin 51407.30859375 +sparsity check 0.2999999672174454 +time 75.89 +5 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 35844.0234375 +err_fin 26020.1484375 +sparsity check 0.2999997138977051 +time 1.36 +5 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 11364.1396484375 +err_fin 8919.3125 +sparsity check 0.2999997138977051 +time 1.32 +5 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 1060.3212890625 +err_fin 600.115966796875 +sparsity check 0.2999999672174454 +time 68.86 +5 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 171220.1875 +err_fin 115109.125 +sparsity check 0.2999999906335558 +time 138.81 +5 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 168897.5625 +err_fin 112407.5625 +sparsity check 0.2999999906335558 +time 139.11 +5 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4605.4111328125 +err_fin 4083.410400390625 +sparsity check 0.2999999906335558 +time 136.74 +6 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 84691.625 +err_fin 60449.29296875 +sparsity check 0.2999999672174454 +time 75.95 +6 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 39424.7890625 +err_fin 29796.72265625 +sparsity check 0.2999997138977051 +time 1.35 +6 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 13354.880859375 +err_fin 10822.541015625 +sparsity check 0.2999997138977051 +time 1.33 +6 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 2051.59423828125 +err_fin 1327.2574462890625 +sparsity check 0.2999999672174454 +time 68.87 +6 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 230577.90625 +err_fin 158950.140625 +sparsity check 0.2999999906335558 +time 138.78 +6 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 225769.265625 +err_fin 154366.09375 +sparsity check 0.2999999906335558 +time 139.15 +6 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6892.95654296875 +err_fin 6190.93994140625 +sparsity check 0.2999999906335558 +time 136.72 +7 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 138298.125 +err_fin 103012.3984375 +sparsity check 0.2999999672174454 +time 75.95 +7 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 59487.4140625 +err_fin 47204.64453125 +sparsity check 0.2999997138977051 +time 1.36 +7 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 18909.47265625 +err_fin 15829.427734375 +sparsity check 0.2999997138977051 +time 1.33 +7 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 2414.797607421875 +err_fin 1489.6678466796875 +sparsity check 0.2999999672174454 +time 68.86 +7 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 287567.875 +err_fin 203346.28125 +sparsity check 0.2999999906335558 +time 138.78 +7 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 280876.90625 +err_fin 197131.953125 +sparsity check 0.2999999906335558 +time 139.12 +7 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 9785.451171875 +err_fin 8780.009765625 +sparsity check 0.2999999906335558 +time 136.69 +8 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 119541.5546875 +err_fin 90286.796875 +sparsity check 0.2999999672174454 +time 75.92 +8 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 52394.3125 +err_fin 41045.8828125 +sparsity check 0.2999997138977051 +time 1.34 +8 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 18216.1171875 +err_fin 15168.751953125 +sparsity check 0.2999997138977051 +time 1.34 +8 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 4256.765625 +err_fin 2574.2548828125 +sparsity check 0.2999999672174454 +time 68.84 +8 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 400993.75 +err_fin 292364.25 +sparsity check 0.2999999906335558 +time 138.76 +8 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 384228.6875 +err_fin 278253.0 +sparsity check 0.2999999906335558 +time 139.13 +8 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 16458.263671875 +err_fin 15318.830078125 +sparsity check 0.2999999906335558 +time 136.74 +9 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 132341.046875 +err_fin 105049.015625 +sparsity check 0.2999999672174454 +time 75.96 +9 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 57340.03125 +err_fin 48335.9765625 +sparsity check 0.2999997138977051 +time 1.38 +9 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 27674.234375 +err_fin 24191.828125 +sparsity check 0.2999997138977051 +time 1.33 +9 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 2507.3154296875 +err_fin 1418.0616455078125 +sparsity check 0.2999999672174454 +time 68.84 +9 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 518084.46875 +err_fin 386963.375 +sparsity check 0.2999999906335558 +time 138.81 +9 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 493708.6875 +err_fin 366268.28125 +sparsity check 0.2999999906335558 +time 139.11 +9 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 17603.853515625 +err_fin 16046.490234375 +sparsity check 0.2999999906335558 +time 136.75 +10 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 74944.375 +err_fin 59686.69921875 +sparsity check 0.2999999672174454 +time 75.98 +10 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 30239.62890625 +err_fin 24843.46875 +sparsity check 0.2999997138977051 +time 1.34 +10 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 26962.50390625 +err_fin 23496.552734375 +sparsity check 0.2999997138977051 +time 1.34 +10 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 1685.6614990234375 +err_fin 1013.326904296875 +sparsity check 0.2999999672174454 +time 68.85 +10 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 635107.875 +err_fin 491833.0625 +sparsity check 0.2999999906335558 +time 138.80 +10 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 602855.25 +err_fin 464191.0625 +sparsity check 0.2999999906335558 +time 139.15 +10 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 19038.14453125 +err_fin 17591.33984375 +sparsity check 0.2999999906335558 +time 136.66 +11 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 125451.3046875 +err_fin 100865.71875 +sparsity check 0.2999999672174454 +time 75.95 +11 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 59601.6796875 +err_fin 49557.6640625 +sparsity check 0.2999997138977051 +time 1.37 +11 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 31191.890625 +err_fin 27034.7109375 +sparsity check 0.2999997138977051 +time 1.33 +11 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 2637.687255859375 +err_fin 1539.096923828125 +sparsity check 0.2999999672174454 +time 68.86 +11 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 684320.9375 +err_fin 534864.75 +sparsity check 0.2999999906335558 +time 138.80 +11 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 652150.8125 +err_fin 507336.3125 +sparsity check 0.2999999906335558 +time 139.10 +11 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 21235.642578125 +err_fin 19640.88671875 +sparsity check 0.2999999906335558 +time 136.72 +12 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 107555.765625 +err_fin 86075.53125 +sparsity check 0.2999999672174454 +time 75.97 +12 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 52001.375 +err_fin 42313.7890625 +sparsity check 0.2999997138977051 +time 1.35 +12 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 23119.60546875 +err_fin 19647.97265625 +sparsity check 0.2999997138977051 +time 1.34 +12 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 3262.0234375 +err_fin 1880.873046875 +sparsity check 0.2999999672174454 +time 68.89 +12 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 677572.875 +err_fin 530410.9375 +sparsity check 0.2999999906335558 +time 138.82 +12 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 651519.0625 +err_fin 507847.25 +sparsity check 0.2999999906335558 +time 139.11 +12 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 23535.591796875 +err_fin 21650.58984375 +sparsity check 0.2999999906335558 +time 136.72 +13 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 135881.90625 +err_fin 109252.109375 +sparsity check 0.2999999672174454 +time 75.92 +13 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 57302.16796875 +err_fin 47143.3359375 +sparsity check 0.2999997138977051 +time 1.37 +13 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 31665.98046875 +err_fin 27384.41796875 +sparsity check 0.2999997138977051 +time 1.32 +13 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 8571.7724609375 +err_fin 5393.1240234375 +sparsity check 0.2999999672174454 +time 68.82 +13 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 715876.1875 +err_fin 553550.3125 +sparsity check 0.2999999906335558 +time 139.03 +13 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 694280.5 +err_fin 534421.25 +sparsity check 0.2999999906335558 +time 139.33 +13 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 30492.3515625 +err_fin 28018.44921875 +sparsity check 0.2999999906335558 +time 136.91 +14 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 233549.734375 +err_fin 189442.5 +sparsity check 0.2999999672174454 +time 76.14 +14 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 95989.8828125 +err_fin 79835.6796875 +sparsity check 0.2999997138977051 +time 1.36 +14 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 44033.52734375 +err_fin 38565.3984375 +sparsity check 0.2999997138977051 +time 1.35 +14 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 9570.501953125 +err_fin 5837.42431640625 +sparsity check 0.2999999672174454 +time 69.01 +14 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 833335.25 +err_fin 657834.4375 +sparsity check 0.2999999906335558 +time 139.19 +14 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 808530.1875 +err_fin 635628.875 +sparsity check 0.2999999906335558 +time 139.55 +14 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 32437.970703125 +err_fin 30035.171875 +sparsity check 0.2999999906335558 +time 137.05 +15 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 165543.4375 +err_fin 135696.78125 +sparsity check 0.2999999672174454 +time 76.16 +15 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 67497.59375 +err_fin 57078.5703125 +sparsity check 0.2999997138977051 +time 1.36 +15 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 55109.40234375 +err_fin 48422.890625 +sparsity check 0.2999997138977051 +time 1.35 +15 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 10721.1318359375 +err_fin 6648.1845703125 +sparsity check 0.2999999672174454 +time 69.04 +15 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 890758.75 +err_fin 703880.0625 +sparsity check 0.2999999906335558 +time 139.18 +15 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 868523.375 +err_fin 682637.25 +sparsity check 0.2999999906335558 +time 139.56 +15 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 38626.4609375 +err_fin 35541.078125 +sparsity check 0.2999999906335558 +time 137.10 +16 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 161715.25 +err_fin 134021.0625 +sparsity check 0.2999999672174454 +time 76.14 +16 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 71157.5390625 +err_fin 60193.890625 +sparsity check 0.2999997138977051 +time 1.37 +16 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 50627.34765625 +err_fin 44834.8359375 +sparsity check 0.2999997138977051 +time 1.35 +16 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 8355.0703125 +err_fin 5260.6513671875 +sparsity check 0.2999999672174454 +time 69.03 +16 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 943084.25 +err_fin 753712.125 +sparsity check 0.2999999906335558 +time 139.15 +16 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 921673.875 +err_fin 734021.4375 +sparsity check 0.2999999906335558 +time 139.48 +16 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 38418.8125 +err_fin 35650.9375 +sparsity check 0.2999999906335558 +time 137.06 +17 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 212698.5625 +err_fin 174037.84375 +sparsity check 0.2999999672174454 +time 76.04 +17 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 91215.4375 +err_fin 76150.0 +sparsity check 0.2999997138977051 +time 1.36 +17 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 43756.53125 +err_fin 37838.7734375 +sparsity check 0.2999997138977051 +time 1.33 +17 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 14142.9765625 +err_fin 7767.17138671875 +sparsity check 0.2999999672174454 +time 68.93 +17 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 925419.9375 +err_fin 726249.9375 +sparsity check 0.2999999906335558 +time 139.10 +17 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 907551.6875 +err_fin 708821.875 +sparsity check 0.2999999906335558 +time 139.44 +17 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 50562.96875 +err_fin 45572.140625 +sparsity check 0.2999999906335558 +time 136.92 +18 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 195582.46875 +err_fin 161440.25 +sparsity check 0.2999999672174454 +time 75.98 +18 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 81771.046875 +err_fin 69608.484375 +sparsity check 0.2999997138977051 +time 1.36 +18 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 68762.984375 +err_fin 61125.35546875 +sparsity check 0.2999997138977051 +time 1.33 +18 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 13785.78125 +err_fin 8140.5859375 +sparsity check 0.2999999672174454 +time 68.90 +18 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1021166.6875 +err_fin 808623.375 +sparsity check 0.2999999906335558 +time 138.87 +18 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 995050.1875 +err_fin 783959.0 +sparsity check 0.2999999906335558 +time 139.23 +18 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 54236.234375 +err_fin 49195.7890625 +sparsity check 0.2999999906335558 +time 136.76 +19 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 236991.8125 +err_fin 198120.375 +sparsity check 0.2999999672174454 +time 75.99 +19 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 95409.734375 +err_fin 83244.34375 +sparsity check 0.2999997138977051 +time 1.37 +19 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 81631.5546875 +err_fin 74011.890625 +sparsity check 0.2999997138977051 +time 1.34 +19 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 12312.45703125 +err_fin 7862.009765625 +sparsity check 0.2999999672174454 +time 68.86 +19 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1110722.75 +err_fin 889099.75 +sparsity check 0.2999999906335558 +time 138.99 +19 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1082564.125 +err_fin 863356.6875 +sparsity check 0.2999999906335558 +time 139.31 +19 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 53912.125 +err_fin 49698.0625 +sparsity check 0.2999999906335558 +time 136.83 +20 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 206513.515625 +err_fin 174613.0 +sparsity check 0.2999999672174454 +time 75.96 +20 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 82910.4140625 +err_fin 72427.0859375 +sparsity check 0.2999997138977051 +time 1.40 +20 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 66453.0859375 +err_fin 60729.4765625 +sparsity check 0.2999997138977051 +time 1.33 +20 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 8775.353515625 +err_fin 5111.00732421875 +sparsity check 0.2999999672174454 +time 68.86 +20 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1194548.25 +err_fin 961933.375 +sparsity check 0.2999999906335558 +time 138.83 +20 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1165982.375 +err_fin 935335.5 +sparsity check 0.2999999906335558 +time 139.24 +20 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 54315.2578125 +err_fin 50397.1640625 +sparsity check 0.2999999906335558 +time 136.77 +21 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 121441.5390625 +err_fin 102211.625 +sparsity check 0.2999999672174454 +time 76.00 +21 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 47674.41796875 +err_fin 40964.671875 +sparsity check 0.2999997138977051 +time 1.36 +21 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 56955.3671875 +err_fin 51291.2890625 +sparsity check 0.2999997138977051 +time 1.35 +21 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 12005.697265625 +err_fin 7319.1103515625 +sparsity check 0.2999999672174454 +time 68.89 +21 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1243470.0 +err_fin 1001291.0 +sparsity check 0.2999999906335558 +time 138.83 +21 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1219399.375 +err_fin 978155.125 +sparsity check 0.2999999906335558 +time 139.20 +21 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 57454.31640625 +err_fin 53498.1171875 +sparsity check 0.2999999906335558 +time 136.75 +22 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 116548.296875 +err_fin 98789.25 +sparsity check 0.2999999672174454 +time 75.97 +22 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 53585.62890625 +err_fin 46771.4140625 +sparsity check 0.2999997138977051 +time 1.37 +22 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 64866.82421875 +err_fin 59049.6640625 +sparsity check 0.2999997138977051 +time 1.34 +22 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 8473.02734375 +err_fin 5000.4384765625 +sparsity check 0.2999999672174454 +time 68.90 +22 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1304645.75 +err_fin 1051347.5 +sparsity check 0.2999999906335558 +time 138.87 +22 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1282196.125 +err_fin 1029592.375 +sparsity check 0.2999999906335558 +time 139.18 +22 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 58738.921875 +err_fin 54899.90625 +sparsity check 0.2999999906335558 +time 136.71 +23 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 177000.34375 +err_fin 148722.3125 +sparsity check 0.2999999672174454 +time 75.96 +23 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 83508.140625 +err_fin 72323.1171875 +sparsity check 0.2999997138977051 +time 1.34 +23 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 68578.0234375 +err_fin 62196.5703125 +sparsity check 0.2999997138977051 +time 1.33 +23 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 11660.74609375 +err_fin 6889.0078125 +sparsity check 0.2999999672174454 +time 68.90 +23 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1337517.5 +err_fin 1078256.25 +sparsity check 0.2999999906335558 +time 138.82 +23 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1317734.75 +err_fin 1058014.5 +sparsity check 0.2999999906335558 +time 139.10 +23 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 60790.53515625 +err_fin 56594.65234375 +sparsity check 0.2999999906335558 +time 136.74 +24 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 156468.140625 +err_fin 130504.625 +sparsity check 0.2999999672174454 +time 75.99 +24 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 73629.4375 +err_fin 63010.8984375 +sparsity check 0.2999997138977051 +time 1.38 +24 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 51387.546875 +err_fin 45452.140625 +sparsity check 0.2999997138977051 +time 1.33 +24 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 10108.07421875 +err_fin 6297.9501953125 +sparsity check 0.2999999672174454 +time 68.88 +24 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1303813.0 +err_fin 1052222.75 +sparsity check 0.2999999906335558 +time 138.82 +24 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1292389.875 +err_fin 1040395.8125 +sparsity check 0.2999999906335558 +time 139.15 +24 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 61466.703125 +err_fin 57394.7421875 +sparsity check 0.2999999906335558 +time 136.67 +25 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 178466.796875 +err_fin 148865.34375 +sparsity check 0.2999999672174454 +time 75.99 +25 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 83107.234375 +err_fin 70354.984375 +sparsity check 0.2999997138977051 +time 1.35 +25 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 53946.8515625 +err_fin 47403.734375 +sparsity check 0.2999997138977051 +time 1.32 +25 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 28022.23046875 +err_fin 17113.30078125 +sparsity check 0.2999999672174454 +time 68.86 +25 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1236659.25 +err_fin 973750.0625 +sparsity check 0.2999999906335558 +time 138.74 +25 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1234294.375 +err_fin 968615.1875 +sparsity check 0.2999999906335558 +time 139.09 +25 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 86355.53125 +err_fin 79051.78125 +sparsity check 0.2999999906335558 +time 136.91 +26 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 272898.78125 +err_fin 227520.6875 +sparsity check 0.2999999672174454 +time 75.93 +26 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 120087.921875 +err_fin 103409.90625 +sparsity check 0.2999997138977051 +time 1.37 +26 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 81232.515625 +err_fin 72057.2734375 +sparsity check 0.2999997138977051 +time 1.34 +26 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 26970.46484375 +err_fin 17077.2265625 +sparsity check 0.2999999672174454 +time 68.85 +26 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1413183.75 +err_fin 1126928.5 +sparsity check 0.2999999906335558 +time 138.83 +26 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1405452.5 +err_fin 1116377.125 +sparsity check 0.2999999906335558 +time 139.12 +26 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 85323.671875 +err_fin 79784.0625 +sparsity check 0.2999999906335558 +time 136.91 +27 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 115558.09375 +err_fin 97767.78125 +sparsity check 0.2999999672174454 +time 75.97 +27 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 56699.1953125 +err_fin 49213.171875 +sparsity check 0.2999997138977051 +time 1.35 +27 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 68962.90625 +err_fin 63054.6484375 +sparsity check 0.2999997138977051 +time 1.34 +27 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 11683.6630859375 +err_fin 6646.47314453125 +sparsity check 0.2999999672174454 +time 68.86 +27 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1501156.25 +err_fin 1203228.25 +sparsity check 0.2999999906335558 +time 138.84 +27 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1491076.625 +err_fin 1190134.875 +sparsity check 0.2999999906335558 +time 139.08 +27 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 90162.8984375 +err_fin 84577.515625 +sparsity check 0.2999999906335558 +time 136.92 +28 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 218782.3125 +err_fin 185109.6875 +sparsity check 0.2999999672174454 +time 75.96 +28 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 105500.078125 +err_fin 92746.3125 +sparsity check 0.2999997138977051 +time 1.34 +28 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 105374.5 +err_fin 96035.859375 +sparsity check 0.2999997138977051 +time 1.33 +28 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 19544.38671875 +err_fin 12621.37890625 +sparsity check 0.2999999672174454 +time 68.87 +28 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1587041.75 +err_fin 1279147.75 +sparsity check 0.2999999906335558 +time 138.87 +28 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1575474.875 +err_fin 1265067.375 +sparsity check 0.2999999906335558 +time 139.14 +28 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 95820.46875 +err_fin 90217.296875 +sparsity check 0.2999999906335558 +time 136.90 +29 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 213201.5 +err_fin 181015.28125 +sparsity check 0.2999999672174454 +time 76.00 +29 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 95993.078125 +err_fin 84196.1015625 +sparsity check 0.2999997138977051 +time 1.36 +29 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 114659.6875 +err_fin 104625.4375 +sparsity check 0.2999997138977051 +time 1.33 +29 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 22542.279296875 +err_fin 15099.7080078125 +sparsity check 0.2999999672174454 +time 68.81 +29 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1681974.875 +err_fin 1361460.25 +sparsity check 0.2999999906335558 +time 138.80 +29 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1667598.75 +err_fin 1344047.0 +sparsity check 0.2999999906335558 +time 139.12 +29 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 101028.2734375 +err_fin 95562.359375 +sparsity check 0.2999999906335558 +time 136.77 +30 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 103397.0 +err_fin 88246.9765625 +sparsity check 0.2999999672174454 +time 76.03 +30 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 40666.4921875 +err_fin 35697.8671875 +sparsity check 0.2999997138977051 +time 1.36 +30 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 104980.0078125 +err_fin 96601.2421875 +sparsity check 0.2999997138977051 +time 1.33 +30 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 13561.2197265625 +err_fin 8894.8076171875 +sparsity check 0.2999999672174454 +time 68.89 +30 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1786091.5 +err_fin 1452231.25 +sparsity check 0.2999999906335558 +time 138.83 +30 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1764579.25 +err_fin 1428528.0 +sparsity check 0.2999999906335558 +time 139.25 +30 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 104187.3828125 +err_fin 98655.8359375 +sparsity check 0.2999999906335558 +time 136.77 +31 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 221379.375 +err_fin 188682.6875 +sparsity check 0.2999999672174454 +time 75.98 +31 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 97616.9296875 +err_fin 85803.203125 +sparsity check 0.2999997138977051 +time 1.36 +31 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 113328.21875 +err_fin 101994.34375 +sparsity check 0.2999997138977051 +time 1.33 +31 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 25169.37890625 +err_fin 17087.880859375 +sparsity check 0.2999999672174454 +time 68.82 +31 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1866266.75 +err_fin 1528834.625 +sparsity check 0.2999999906335558 +time 138.85 +31 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1838902.5 +err_fin 1500553.625 +sparsity check 0.2999999906335558 +time 139.15 +31 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 105678.5 +err_fin 100301.6015625 +sparsity check 0.2999999906335558 +time 136.69 +32 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 262803.875 +err_fin 224880.0 +sparsity check 0.2999999672174454 +time 75.97 +32 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 113062.328125 +err_fin 100499.796875 +sparsity check 0.2999997138977051 +time 1.35 +32 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 100919.734375 +err_fin 92579.75 +sparsity check 0.2999997138977051 +time 1.34 +32 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 15704.1376953125 +err_fin 10146.11328125 +sparsity check 0.2999999672174454 +time 68.87 +32 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1915143.25 +err_fin 1565343.0 +sparsity check 0.2999999906335558 +time 138.87 +32 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1892121.5 +err_fin 1540707.0 +sparsity check 0.2999999906335558 +time 139.24 +32 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 112154.046875 +err_fin 106190.984375 +sparsity check 0.2999999906335558 +time 136.78 +33 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 380957.1875 +err_fin 323374.1875 +sparsity check 0.2999999672174454 +time 76.04 +33 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 152019.015625 +err_fin 133161.09375 +sparsity check 0.2999997138977051 +time 1.35 +33 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 112660.0546875 +err_fin 101788.078125 +sparsity check 0.2999997138977051 +time 1.34 +33 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 32713.7578125 +err_fin 20666.58203125 +sparsity check 0.2999999672174454 +time 68.83 +33 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1935303.0 +err_fin 1560679.875 +sparsity check 0.2999999906335558 +time 138.96 +33 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1917896.0 +err_fin 1540437.5 +sparsity check 0.2999999906335558 +time 139.28 +33 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 128665.703125 +err_fin 121072.546875 +sparsity check 0.2999999906335558 +time 136.83 +34 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 122775.3359375 +err_fin 103060.7265625 +sparsity check 0.2999999672174454 +time 75.95 +34 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 53525.4609375 +err_fin 45110.37109375 +sparsity check 0.2999997138977051 +time 1.35 +34 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 67481.703125 +err_fin 59663.5234375 +sparsity check 0.2999997138977051 +time 1.35 +34 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 21456.5234375 +err_fin 13878.259765625 +sparsity check 0.2999999672174454 +time 68.86 +34 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2000206.25 +err_fin 1622827.0 +sparsity check 0.2999999906335558 +time 138.82 +34 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1980603.25 +err_fin 1600789.125 +sparsity check 0.2999999906335558 +time 139.12 +34 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 140142.703125 +err_fin 132631.9375 +sparsity check 0.2999999906335558 +time 136.79 +35 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 214924.96875 +err_fin 182660.640625 +sparsity check 0.2999999672174454 +time 76.11 +35 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 84956.5703125 +err_fin 73921.9296875 +sparsity check 0.2999997138977051 +time 1.35 +35 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 91451.0703125 +err_fin 82992.3828125 +sparsity check 0.2999997138977051 +time 1.34 +35 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 25634.619140625 +err_fin 15644.27734375 +sparsity check 0.2999999672174454 +time 68.99 +35 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2124431.5 +err_fin 1726037.875 +sparsity check 0.2999999906335558 +time 139.11 +35 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2101923.75 +err_fin 1701514.0 +sparsity check 0.2999999906335558 +time 139.44 +35 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 150777.953125 +err_fin 142886.890625 +sparsity check 0.2999999906335558 +time 137.07 +36 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 205836.03125 +err_fin 175031.5625 +sparsity check 0.2999999672174454 +time 76.14 +36 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 84598.546875 +err_fin 73822.7421875 +sparsity check 0.2999997138977051 +time 1.37 +36 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 88803.203125 +err_fin 80859.484375 +sparsity check 0.2999997138977051 +time 1.34 +36 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 18330.46484375 +err_fin 11179.58984375 +sparsity check 0.2999999672174454 +time 68.94 +36 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2172691.0 +err_fin 1762100.875 +sparsity check 0.2999999906335558 +time 139.11 +36 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2152439.5 +err_fin 1739655.5 +sparsity check 0.2999999906335558 +time 139.44 +36 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 168783.984375 +err_fin 159445.0 +sparsity check 0.2999999906335558 +time 136.99 +37 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 341693.96875 +err_fin 289524.4375 +sparsity check 0.2999999672174454 +time 76.10 +37 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 137424.4375 +err_fin 120404.546875 +sparsity check 0.2999997138977051 +time 1.36 +37 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 113853.453125 +err_fin 103941.421875 +sparsity check 0.2999997138977051 +time 1.33 +37 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 30907.140625 +err_fin 17678.953125 +sparsity check 0.2999999672174454 +time 68.96 +37 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2275916.5 +err_fin 1835961.0 +sparsity check 0.2999999906335558 +time 139.10 +37 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2257321.25 +err_fin 1813333.0 +sparsity check 0.2999999906335558 +time 139.47 +37 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 185594.75 +err_fin 175083.84375 +sparsity check 0.2999999906335558 +time 137.00 +38 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 383625.125 +err_fin 322323.53125 +sparsity check 0.2999999672174454 +time 76.01 +38 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 151267.09375 +err_fin 130291.4375 +sparsity check 0.2999997138977051 +time 1.34 +38 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 120968.46875 +err_fin 107977.6875 +sparsity check 0.2999997138977051 +time 1.34 +38 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 49039.015625 +err_fin 28431.6796875 +sparsity check 0.2999999672174454 +time 68.88 +38 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2348146.5 +err_fin 1889757.5 +sparsity check 0.2999999906335558 +time 138.87 +38 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2332156.0 +err_fin 1869847.375 +sparsity check 0.2999999906335558 +time 138.94 +38 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 209499.734375 +err_fin 197284.65625 +sparsity check 0.2999999906335558 +time 136.75 +39 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 472518.0 +err_fin 396747.0 +sparsity check 0.2999999672174454 +time 75.95 +39 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 196093.890625 +err_fin 170486.25 +sparsity check 0.2999997138977051 +time 1.35 +39 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 131558.921875 +err_fin 117586.1328125 +sparsity check 0.2999997138977051 +time 1.34 +39 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 58529.93359375 +err_fin 33396.3984375 +sparsity check 0.2999999672174454 +time 68.86 +39 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2372547.5 +err_fin 1890493.375 +sparsity check 0.2999999906335558 +time 138.83 +39 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2361087.75 +err_fin 1874025.75 +sparsity check 0.2999999906335558 +time 139.10 +39 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 255438.25 +err_fin 237492.125 +sparsity check 0.2999999906335558 +time 136.81 +40 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 439511.4375 +err_fin 365144.625 +sparsity check 0.2999999672174454 +time 75.98 +40 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 170366.28125 +err_fin 147419.84375 +sparsity check 0.2999997138977051 +time 1.34 +40 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 150494.75 +err_fin 131893.71875 +sparsity check 0.2999997138977051 +time 1.34 +40 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 83030.8125 +err_fin 54382.9609375 +sparsity check 0.2999999672174454 +time 68.87 +40 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2463948.5 +err_fin 1940839.25 +sparsity check 0.2999999906335558 +time 138.85 +40 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2448904.25 +err_fin 1920500.5 +sparsity check 0.2999999906335558 +time 139.23 +40 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 289980.5 +err_fin 270139.5625 +sparsity check 0.2999999906335558 +time 136.79 +41 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 354734.1875 +err_fin 291964.53125 +sparsity check 0.2999999672174454 +time 76.02 +41 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 134578.46875 +err_fin 115245.9453125 +sparsity check 0.2999997138977051 +time 1.34 +41 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 130844.421875 +err_fin 115337.03125 +sparsity check 0.2999997138977051 +time 1.34 +41 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 88723.109375 +err_fin 51516.10546875 +sparsity check 0.2999999672174454 +time 68.86 +41 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2564022.0 +err_fin 1986360.125 +sparsity check 0.2999999906335558 +time 138.83 +41 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2507331.75 +err_fin 1930532.125 +sparsity check 0.2999999906335558 +time 139.11 +41 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 338828.03125 +err_fin 312503.9375 +sparsity check 0.2999999906335558 +time 136.91 +42 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 411178.53125 +err_fin 335910.5625 +sparsity check 0.2999999672174454 +time 75.96 +42 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 146400.75 +err_fin 125877.1875 +sparsity check 0.2999997138977051 +time 1.35 +42 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 161474.5625 +err_fin 143672.21875 +sparsity check 0.2999997138977051 +time 1.33 +42 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 82672.375 +err_fin 50431.01171875 +sparsity check 0.2999999672174454 +time 68.84 +42 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2767744.0 +err_fin 2130180.75 +sparsity check 0.2999999906335558 +time 138.80 +42 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2647581.5 +err_fin 2025464.0 +sparsity check 0.2999999906335558 +time 139.09 +42 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 366250.5 +err_fin 337305.78125 +sparsity check 0.2999999906335558 +time 136.68 +43 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 280567.90625 +err_fin 228823.125 +sparsity check 0.2999999672174454 +time 75.96 +43 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 120347.125 +err_fin 102759.8125 +sparsity check 0.2999997138977051 +time 1.35 +43 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 126751.6484375 +err_fin 109898.375 +sparsity check 0.2999997138977051 +time 1.33 +43 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 58906.57421875 +err_fin 38343.58203125 +sparsity check 0.2999999672174454 +time 68.79 +43 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2876848.5 +err_fin 2203414.5 +sparsity check 0.2999999906335558 +time 138.79 +43 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2726980.0 +err_fin 2075451.25 +sparsity check 0.2999999906335558 +time 139.08 +43 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 401723.9375 +err_fin 369312.71875 +sparsity check 0.2999999906335558 +time 136.86 +44 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 508804.59375 +err_fin 415737.625 +sparsity check 0.2999999672174454 +time 75.98 +44 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 150069.484375 +err_fin 130828.453125 +sparsity check 0.2999997138977051 +time 1.35 +44 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 193368.78125 +err_fin 174290.75 +sparsity check 0.2999997138977051 +time 1.33 +44 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 145043.71875 +err_fin 95371.828125 +sparsity check 0.2999999672174454 +time 68.91 +44 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3146891.5 +err_fin 2387051.0 +sparsity check 0.2999999906335558 +time 138.84 +44 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2884849.5 +err_fin 2171657.5 +sparsity check 0.2999999906335558 +time 139.12 +44 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 464940.0625 +err_fin 420746.96875 +sparsity check 0.2999999906335558 +time 136.88 +45 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 317623.25 +err_fin 255841.640625 +sparsity check 0.2999999672174454 +time 75.96 +45 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 126769.59375 +err_fin 108456.75 +sparsity check 0.2999997138977051 +time 1.36 +45 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 187572.5625 +err_fin 168927.65625 +sparsity check 0.2999997138977051 +time 1.34 +45 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 47674.6171875 +err_fin 32155.158203125 +sparsity check 0.2999999672174454 +time 68.86 +45 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3405428.5 +err_fin 2587359.5 +sparsity check 0.2999999906335558 +time 138.85 +45 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3085100.25 +err_fin 2325033.5 +sparsity check 0.2999999906335558 +time 138.80 +45 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 475345.46875 +err_fin 431596.9375 +sparsity check 0.2999999906335558 +time 136.38 +46 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 263395.71875 +err_fin 213005.28125 +sparsity check 0.2999999672174454 +time 75.96 +46 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 99574.15625 +err_fin 86753.2578125 +sparsity check 0.2999997138977051 +time 1.35 +46 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 162039.59375 +err_fin 146753.984375 +sparsity check 0.2999997138977051 +time 1.34 +46 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 63940.453125 +err_fin 44551.078125 +sparsity check 0.2999999672174454 +time 68.87 +46 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3655060.0 +err_fin 2791309.25 +sparsity check 0.2999999906335558 +time 138.78 +46 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3261432.0 +err_fin 2472352.5 +sparsity check 0.2999999906335558 +time 139.14 +46 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 475824.5 +err_fin 435338.125 +sparsity check 0.2999999906335558 +time 136.65 +47 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 364249.84375 +err_fin 295973.9375 +sparsity check 0.2999999672174454 +time 75.92 +47 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 146091.640625 +err_fin 126103.03125 +sparsity check 0.2999997138977051 +time 1.35 +47 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 160440.15625 +err_fin 143679.96875 +sparsity check 0.2999997138977051 +time 1.34 +47 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 98694.78125 +err_fin 55647.3125 +sparsity check 0.2999999672174454 +time 68.85 +47 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3775736.75 +err_fin 2854559.0 +sparsity check 0.2999999906335558 +time 138.80 +47 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3327351.25 +err_fin 2494366.0 +sparsity check 0.2999999906335558 +time 139.14 +47 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 534005.25 +err_fin 482482.3125 +sparsity check 0.2999999906335558 +time 136.75 +48 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 172792.40625 +err_fin 139728.265625 +sparsity check 0.2999999672174454 +time 75.99 +48 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 57409.11328125 +err_fin 49361.9609375 +sparsity check 0.2999997138977051 +time 1.37 +48 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 152449.59375 +err_fin 137785.90625 +sparsity check 0.2999997138977051 +time 1.33 +48 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 61970.984375 +err_fin 41327.55078125 +sparsity check 0.2999999672174454 +time 68.82 +48 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3948376.5 +err_fin 2967533.5 +sparsity check 0.2999999906335558 +time 138.81 +48 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3463526.5 +err_fin 2581677.0 +sparsity check 0.2999999906335558 +time 139.12 +48 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 531346.875 +err_fin 482351.875 +sparsity check 0.2999999906335558 +time 136.92 +49 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 92701.71875 +err_fin 74674.328125 +sparsity check 0.2999999672174454 +time 75.92 +49 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 39942.46875 +err_fin 34114.578125 +sparsity check 0.2999997138977051 +time 1.34 +49 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 124484.96875 +err_fin 110166.25 +sparsity check 0.2999997138977051 +time 1.33 +49 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 23617.455078125 +err_fin 14932.462890625 +sparsity check 0.2999999672174454 +time 68.82 +49 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4097844.75 +err_fin 3080826.75 +sparsity check 0.2999999906335558 +time 138.81 +49 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3592342.5 +err_fin 2677633.5 +sparsity check 0.2999999906335558 +time 139.21 +49 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 533097.8125 +err_fin 486073.15625 +sparsity check 0.2999999906335558 +time 136.77 +50 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 154342.25 +err_fin 124155.21875 +sparsity check 0.2999999672174454 +time 75.98 +50 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 52400.37890625 +err_fin 45099.2734375 +sparsity check 0.2999997138977051 +time 1.33 +50 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 159195.15625 +err_fin 142175.875 +sparsity check 0.2999997138977051 +time 1.33 +50 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 51435.8125 +err_fin 35645.078125 +sparsity check 0.2999999672174454 +time 68.90 +50 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4253453.0 +err_fin 3199545.0 +sparsity check 0.2999999906335558 +time 138.84 +50 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3711681.5 +err_fin 2771113.5 +sparsity check 0.2999999906335558 +time 139.13 +50 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 534982.4375 +err_fin 488952.84375 +sparsity check 0.2999999906335558 +time 136.90 +51 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 203378.9375 +err_fin 163606.921875 +sparsity check 0.2999999672174454 +time 76.02 +51 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 75364.7109375 +err_fin 65284.08203125 +sparsity check 0.2999997138977051 +time 1.36 +51 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 164796.984375 +err_fin 149017.71875 +sparsity check 0.2999997138977051 +time 1.34 +51 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 46578.84765625 +err_fin 30409.25 +sparsity check 0.2999999672174454 +time 68.87 +51 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4372869.5 +err_fin 3297004.75 +sparsity check 0.2999999906335558 +time 138.84 +51 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3827711.5 +err_fin 2864346.0 +sparsity check 0.2999999906335558 +time 139.24 +51 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 538380.125 +err_fin 493029.0 +sparsity check 0.2999999906335558 +time 136.94 +52 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 343205.59375 +err_fin 277933.5 +sparsity check 0.2999999672174454 +time 75.94 +52 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 97406.1171875 +err_fin 85302.9453125 +sparsity check 0.2999997138977051 +time 1.35 +52 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 188485.796875 +err_fin 169885.046875 +sparsity check 0.2999997138977051 +time 1.34 +52 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 87558.890625 +err_fin 55424.01953125 +sparsity check 0.2999999672174454 +time 68.87 +52 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4470279.5 +err_fin 3383197.5 +sparsity check 0.2999999906335558 +time 138.96 +52 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3904438.5 +err_fin 2933288.5 +sparsity check 0.2999999906335558 +time 139.29 +52 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 545559.8125 +err_fin 500625.75 +sparsity check 0.2999999906335558 +time 136.99 +53 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 98386.859375 +err_fin 79794.640625 +sparsity check 0.2999999672174454 +time 75.96 +53 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 30940.599609375 +err_fin 26762.8828125 +sparsity check 0.2999997138977051 +time 1.35 +53 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 157410.75 +err_fin 141510.515625 +sparsity check 0.2999997138977051 +time 1.35 +53 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 35727.7890625 +err_fin 23910.728515625 +sparsity check 0.2999999672174454 +time 68.88 +53 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4587562.5 +err_fin 3464021.0 +sparsity check 0.2999999906335558 +time 138.88 +53 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4015500.5 +err_fin 3009185.75 +sparsity check 0.2999999906335558 +time 138.90 +53 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 551366.0 +err_fin 506246.21875 +sparsity check 0.2999999906335558 +time 136.82 +54 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 115892.796875 +err_fin 93850.4921875 +sparsity check 0.2999999672174454 +time 76.00 +54 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 36039.2734375 +err_fin 30834.34765625 +sparsity check 0.2999997138977051 +time 1.34 +54 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 162464.390625 +err_fin 146485.0625 +sparsity check 0.2999997138977051 +time 1.33 +54 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 44281.5078125 +err_fin 28517.87890625 +sparsity check 0.2999999672174454 +time 68.87 +54 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4700598.0 +err_fin 3554828.5 +sparsity check 0.2999999906335558 +time 138.84 +54 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4124973.0 +err_fin 3096894.5 +sparsity check 0.2999999906335558 +time 139.31 +54 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 551188.875 +err_fin 507271.125 +sparsity check 0.2999999906335558 +time 136.79 +55 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 192386.921875 +err_fin 156298.625 +sparsity check 0.2999999672174454 +time 76.08 +55 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 68489.171875 +err_fin 59610.7265625 +sparsity check 0.2999997138977051 +time 1.35 +55 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 213937.28125 +err_fin 192016.8125 +sparsity check 0.2999997138977051 +time 1.33 +55 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 85003.484375 +err_fin 55887.3359375 +sparsity check 0.2999999672174454 +time 68.81 +55 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4787985.5 +err_fin 3627535.75 +sparsity check 0.2999999906335558 +time 138.80 +55 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4230457.0 +err_fin 3182817.75 +sparsity check 0.2999999906335558 +time 139.23 +55 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 562564.5625 +err_fin 519353.1875 +sparsity check 0.2999999906335558 +time 136.79 +56 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 145041.234375 +err_fin 117943.0859375 +sparsity check 0.2999999672174454 +time 76.03 +56 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 54334.1484375 +err_fin 46924.34375 +sparsity check 0.2999997138977051 +time 1.36 +56 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 150658.5625 +err_fin 135525.34375 +sparsity check 0.2999997138977051 +time 1.35 +56 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 42762.5 +err_fin 26602.42578125 +sparsity check 0.2999999672174454 +time 68.92 +56 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4849524.0 +err_fin 3669309.5 +sparsity check 0.2999999906335558 +time 138.91 +56 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4295199.0 +err_fin 3226766.5 +sparsity check 0.2999999906335558 +time 139.18 +56 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 568823.25 +err_fin 525306.875 +sparsity check 0.2999999906335558 +time 136.89 +57 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 126576.09375 +err_fin 102643.7109375 +sparsity check 0.2999999672174454 +time 75.94 +57 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 42347.52734375 +err_fin 37046.171875 +sparsity check 0.2999997138977051 +time 1.34 +57 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 191894.734375 +err_fin 173407.640625 +sparsity check 0.2999997138977051 +time 1.34 +57 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 45342.23046875 +err_fin 30498.8671875 +sparsity check 0.2999999672174454 +time 68.85 +57 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5016962.0 +err_fin 3800603.0 +sparsity check 0.2999999906335558 +time 138.85 +57 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4454324.5 +err_fin 3350025.5 +sparsity check 0.2999999906335558 +time 139.16 +57 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 579653.125 +err_fin 536391.1875 +sparsity check 0.2999999906335558 +time 136.78 +58 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 66646.84375 +err_fin 54362.26171875 +sparsity check 0.2999999672174454 +time 75.95 +58 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 21354.5625 +err_fin 18121.306640625 +sparsity check 0.2999997138977051 +time 1.36 +58 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 137431.15625 +err_fin 122708.6875 +sparsity check 0.2999997138977051 +time 1.34 +58 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 28380.96875 +err_fin 18636.51171875 +sparsity check 0.2999999672174454 +time 68.83 +58 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5121918.5 +err_fin 3885969.75 +sparsity check 0.2999999906335558 +time 138.87 +58 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4555366.0 +err_fin 3433220.0 +sparsity check 0.2999999906335558 +time 139.20 +58 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 582902.4375 +err_fin 540428.125 +sparsity check 0.2999999906335558 +time 137.03 +59 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 88575.640625 +err_fin 71788.8125 +sparsity check 0.2999999672174454 +time 75.94 +59 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 26823.66015625 +err_fin 23156.845703125 +sparsity check 0.2999997138977051 +time 1.35 +59 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 152805.875 +err_fin 135758.5625 +sparsity check 0.2999997138977051 +time 1.34 +59 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 27197.44140625 +err_fin 17258.873046875 +sparsity check 0.2999999672174454 +time 68.86 +59 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5239635.0 +err_fin 3983008.75 +sparsity check 0.2999999906335558 +time 138.78 +59 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4665852.0 +err_fin 3524699.75 +sparsity check 0.2999999906335558 +time 139.09 +59 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 582928.3125 +err_fin 541999.0625 +sparsity check 0.2999999906335558 +time 136.90 +60 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 12798.548828125 +err_fin 10278.3193359375 +sparsity check 0.2999999672174454 +time 75.93 +60 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 3548.48876953125 +err_fin 2970.5234375 +sparsity check 0.2999997138977051 +time 1.35 +60 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 82546.890625 +err_fin 73781.4765625 +sparsity check 0.2999997138977051 +time 1.34 +60 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 15324.7158203125 +err_fin 10076.595703125 +sparsity check 0.2999999672174454 +time 68.79 +60 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5312578.5 +err_fin 4051645.75 +sparsity check 0.2999999906335558 +time 138.96 +60 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4749171.0 +err_fin 3602157.5 +sparsity check 0.2999999906335558 +time 139.38 +60 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 578575.6875 +err_fin 539150.25 +sparsity check 0.2999999906335558 +time 136.96 +61 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 54320.1015625 +err_fin 44391.296875 +sparsity check 0.2999999672174454 +time 75.97 +61 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 16122.8203125 +err_fin 13871.373046875 +sparsity check 0.2999997138977051 +time 1.34 +61 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 140027.9375 +err_fin 126466.546875 +sparsity check 0.2999997138977051 +time 1.33 +61 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 24995.88671875 +err_fin 16653.05078125 +sparsity check 0.2999999672174454 +time 68.86 +61 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5411788.0 +err_fin 4130672.5 +sparsity check 0.2999999906335558 +time 139.06 +61 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4861670.0 +err_fin 3689423.0 +sparsity check 0.2999999906335558 +time 139.50 +61 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 580966.125 +err_fin 542406.75 +sparsity check 0.2999999906335558 +time 136.93 +62 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 57740.171875 +err_fin 47270.33203125 +sparsity check 0.2999999672174454 +time 76.05 +62 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 19181.212890625 +err_fin 16542.439453125 +sparsity check 0.2999997138977051 +time 1.34 +62 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 151797.3125 +err_fin 135003.84375 +sparsity check 0.2999997138977051 +time 1.35 +62 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 20518.494140625 +err_fin 12232.9052734375 +sparsity check 0.2999999672174454 +time 68.94 +62 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5527104.5 +err_fin 4233280.5 +sparsity check 0.2999999906335558 +time 139.10 +62 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4990665.0 +err_fin 3798818.5 +sparsity check 0.2999999906335558 +time 139.41 +62 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 587051.875 +err_fin 548654.25 +sparsity check 0.2999999906335558 +time 137.18 +63 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 53336.2421875 +err_fin 43936.7578125 +sparsity check 0.2999999672174454 +time 76.09 +63 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 14450.50390625 +err_fin 12411.6513671875 +sparsity check 0.2999997138977051 +time 1.36 +63 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 119009.359375 +err_fin 104587.875 +sparsity check 0.2999997138977051 +time 1.34 +63 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 18648.859375 +err_fin 12419.607421875 +sparsity check 0.2999999672174454 +time 68.98 +63 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5644198.0 +err_fin 4332246.0 +sparsity check 0.2999999906335558 +time 139.17 +63 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5100512.0 +err_fin 3895234.75 +sparsity check 0.2999999906335558 +time 139.48 +63 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 593314.5 +err_fin 555283.4375 +sparsity check 0.2999999906335558 +time 137.05 +64 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 102718.59375 +err_fin 84217.453125 +sparsity check 0.2999999672174454 +time 76.04 +64 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 33677.140625 +err_fin 28981.064453125 +sparsity check 0.2999997138977051 +time 1.36 +64 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 171384.203125 +err_fin 154471.34375 +sparsity check 0.2999997138977051 +time 1.35 +64 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 33770.703125 +err_fin 21257.568359375 +sparsity check 0.2999999672174454 +time 68.89 +64 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5753349.0 +err_fin 4422938.0 +sparsity check 0.2999999906335558 +time 138.94 +64 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5216544.0 +err_fin 3991321.0 +sparsity check 0.2999999906335558 +time 139.22 +64 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 602094.5 +err_fin 563713.875 +sparsity check 0.2999999906335558 +time 136.81 +65 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 26355.453125 +err_fin 21620.01953125 +sparsity check 0.2999999672174454 +time 75.99 +65 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 6515.88671875 +err_fin 5465.53125 +sparsity check 0.2999997138977051 +time 1.36 +65 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 94989.2734375 +err_fin 83817.53125 +sparsity check 0.2999997138977051 +time 1.33 +65 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 14039.490234375 +err_fin 8782.720703125 +sparsity check 0.2999999672174454 +time 68.94 +65 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5849988.0 +err_fin 4509393.0 +sparsity check 0.2999999906335558 +time 138.85 +65 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5327747.0 +err_fin 4088452.5 +sparsity check 0.2999999906335558 +time 139.20 +65 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 611602.875 +err_fin 573357.125 +sparsity check 0.2999999906335558 +time 136.75 +66 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 44273.30859375 +err_fin 36516.1875 +sparsity check 0.2999999672174454 +time 75.97 +66 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 12960.013671875 +err_fin 10998.443359375 +sparsity check 0.2999997138977051 +time 1.37 +66 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 125497.7734375 +err_fin 112727.65625 +sparsity check 0.2999997138977051 +time 1.33 +66 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 30598.025390625 +err_fin 21054.765625 +sparsity check 0.2999999672174454 +time 68.93 +66 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5992271.0 +err_fin 4617096.0 +sparsity check 0.2999999906335558 +time 138.77 +66 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5491715.5 +err_fin 4211823.0 +sparsity check 0.2999999906335558 +time 139.10 +66 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 640144.5625 +err_fin 599823.5 +sparsity check 0.2999999906335558 +time 136.91 +67 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 21747.685546875 +err_fin 17695.83984375 +sparsity check 0.2999999672174454 +time 75.93 +67 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 5541.3466796875 +err_fin 4561.103515625 +sparsity check 0.2999997138977051 +time 1.36 +67 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 51526.2890625 +err_fin 44337.6171875 +sparsity check 0.2999997138977051 +time 1.34 +67 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 9575.8984375 +err_fin 5245.2421875 +sparsity check 0.2999999672174454 +time 68.88 +67 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6040048.0 +err_fin 4659346.0 +sparsity check 0.2999999906335558 +time 138.87 +67 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5572547.5 +err_fin 4280505.5 +sparsity check 0.2999999906335558 +time 139.21 +67 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 641468.875 +err_fin 600853.75 +sparsity check 0.2999999906335558 +time 136.85 +68 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 206237.96875 +err_fin 169046.53125 +sparsity check 0.2999999672174454 +time 75.99 +68 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 74574.7734375 +err_fin 64934.2890625 +sparsity check 0.2999997138977051 +time 1.37 +68 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 244333.90625 +err_fin 221241.34375 +sparsity check 0.2999997138977051 +time 1.33 +68 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 30313.84375 +err_fin 20368.40234375 +sparsity check 0.2999999672174454 +time 68.93 +68 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6263252.0 +err_fin 4839256.5 +sparsity check 0.2999999906335558 +time 138.87 +68 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5811772.0 +err_fin 4470888.5 +sparsity check 0.2999999906335558 +time 139.21 +68 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 687192.75 +err_fin 642274.625 +sparsity check 0.2999999906335558 +time 136.80 +69 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 366524.09375 +err_fin 299911.40625 +sparsity check 0.2999999672174454 +time 75.93 +69 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 163258.78125 +err_fin 142651.890625 +sparsity check 0.2999997138977051 +time 1.37 +69 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 325972.21875 +err_fin 290618.65625 +sparsity check 0.2999997138977051 +time 1.32 +69 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 51660.125 +err_fin 33025.34375 +sparsity check 0.2999999672174454 +time 68.86 +69 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6389854.0 +err_fin 4935137.0 +sparsity check 0.2999999906335558 +time 138.90 +69 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5991463.5 +err_fin 4607961.5 +sparsity check 0.2999999906335558 +time 139.25 +69 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 720987.375 +err_fin 674224.75 +sparsity check 0.2999999906335558 +time 136.86 +70 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 200002.9375 +err_fin 163869.09375 +sparsity check 0.2999999672174454 +time 75.96 +70 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 76289.9375 +err_fin 65971.640625 +sparsity check 0.2999997138977051 +time 1.35 +70 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 185622.34375 +err_fin 164882.9375 +sparsity check 0.2999997138977051 +time 1.33 +70 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 38191.6953125 +err_fin 22749.255859375 +sparsity check 0.2999999672174454 +time 68.84 +70 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6531525.0 +err_fin 5040001.0 +sparsity check 0.2999999906335558 +time 138.79 +70 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6153728.0 +err_fin 4732230.0 +sparsity check 0.2999999906335558 +time 139.11 +70 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 769950.0 +err_fin 719316.5625 +sparsity check 0.2999999906335558 +time 136.89 +71 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 284382.1875 +err_fin 232780.40625 +sparsity check 0.2999999672174454 +time 75.93 +71 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 123754.65625 +err_fin 107557.0625 +sparsity check 0.2999997138977051 +time 1.36 +71 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 198829.28125 +err_fin 178026.65625 +sparsity check 0.2999997138977051 +time 1.32 +71 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 50413.4140625 +err_fin 30423.818359375 +sparsity check 0.2999999672174454 +time 68.88 +71 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6734894.0 +err_fin 5184268.0 +sparsity check 0.2999999906335558 +time 138.86 +71 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6357098.0 +err_fin 4879386.0 +sparsity check 0.2999999906335558 +time 139.14 +71 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 811461.6875 +err_fin 756491.875 +sparsity check 0.2999999906335558 +time 136.68 +72 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 408341.9375 +err_fin 332715.6875 +sparsity check 0.2999999672174454 +time 75.90 +72 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 170334.28125 +err_fin 148576.5625 +sparsity check 0.2999997138977051 +time 1.37 +72 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 232841.8125 +err_fin 209264.15625 +sparsity check 0.2999997138977051 +time 1.34 +72 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 66256.8125 +err_fin 44045.484375 +sparsity check 0.2999999672174454 +time 68.89 +72 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6898981.0 +err_fin 5295198.0 +sparsity check 0.2999999906335558 +time 138.82 +72 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6553801.0 +err_fin 5015821.0 +sparsity check 0.2999999906335558 +time 139.15 +72 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 872650.0625 +err_fin 812148.125 +sparsity check 0.2999999906335558 +time 136.76 +73 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 378989.0 +err_fin 307307.5 +sparsity check 0.2999999672174454 +time 75.96 +73 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 165181.125 +err_fin 143724.21875 +sparsity check 0.2999997138977051 +time 1.40 +73 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 264383.25 +err_fin 233683.65625 +sparsity check 0.2999997138977051 +time 1.33 +73 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 42146.91015625 +err_fin 28379.98828125 +sparsity check 0.2999999672174454 +time 68.90 +73 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 7066649.0 +err_fin 5405444.0 +sparsity check 0.2999999906335558 +time 138.88 +73 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6746659.0 +err_fin 5147261.0 +sparsity check 0.2999999906335558 +time 139.23 +73 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 962808.6875 +err_fin 890883.9375 +sparsity check 0.2999999906335558 +time 136.70 +74 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 341475.875 +err_fin 275513.9375 +sparsity check 0.2999999672174454 +time 75.94 +74 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 141449.484375 +err_fin 120626.625 +sparsity check 0.2999997138977051 +time 1.35 +74 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 202775.5625 +err_fin 176957.40625 +sparsity check 0.2999997138977051 +time 1.34 +74 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 90303.7109375 +err_fin 52464.86328125 +sparsity check 0.2999999672174454 +time 68.85 +74 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 7239382.0 +err_fin 5476796.0 +sparsity check 0.2999999906335558 +time 138.81 +74 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6935505.5 +err_fin 5232003.0 +sparsity check 0.2999999906335558 +time 139.13 +74 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1066884.75 +err_fin 980786.3125 +sparsity check 0.2999999906335558 +time 136.83 +75 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 350630.28125 +err_fin 279884.8125 +sparsity check 0.2999999672174454 +time 76.00 +75 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 138135.21875 +err_fin 116699.4453125 +sparsity check 0.2999997138977051 +time 1.35 +75 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 233201.03125 +err_fin 199713.890625 +sparsity check 0.2999997138977051 +time 1.34 +75 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 88134.2421875 +err_fin 51477.3515625 +sparsity check 0.2999999672174454 +time 68.91 +75 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 7317242.0 +err_fin 5489530.0 +sparsity check 0.2999999906335558 +time 138.88 +75 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 7001968.0 +err_fin 5241092.0 +sparsity check 0.2999999906335558 +time 138.91 +75 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1205966.0 +err_fin 1095852.5 +sparsity check 0.2999999906335558 +time 136.46 +76 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 533007.0 +err_fin 417794.25 +sparsity check 0.2999999672174454 +time 75.93 +76 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 203242.84375 +err_fin 168667.953125 +sparsity check 0.2999997138977051 +time 1.34 +76 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 371379.875 +err_fin 314949.1875 +sparsity check 0.2999997138977051 +time 1.34 +76 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 271257.5 +err_fin 169999.609375 +sparsity check 0.2999999672174454 +time 68.88 +76 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 7275406.5 +err_fin 5352736.5 +sparsity check 0.2999999906335558 +time 138.82 +76 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6911096.0 +err_fin 5068489.0 +sparsity check 0.2999999906335558 +time 139.15 +76 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1480580.5 +err_fin 1310576.75 +sparsity check 0.2999999906335558 +time 136.81 +77 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 355251.8125 +err_fin 271370.0625 +sparsity check 0.2999999672174454 +time 75.95 +77 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 144754.375 +err_fin 117778.796875 +sparsity check 0.2999997138977051 +time 1.35 +77 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 251212.9375 +err_fin 208745.71875 +sparsity check 0.2999997138977051 +time 1.34 +77 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 147515.015625 +err_fin 74935.5703125 +sparsity check 0.2999999672174454 +time 68.82 +77 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6806460.0 +err_fin 4879777.0 +sparsity check 0.2999999906335558 +time 138.76 +77 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6467157.0 +err_fin 4620484.5 +sparsity check 0.2999999906335558 +time 138.78 +77 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1742981.375 +err_fin 1490763.75 +sparsity check 0.2999999906335558 +time 136.43 +78 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 319464.34375 +err_fin 234997.234375 +sparsity check 0.2999999672174454 +time 75.94 +78 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 141533.734375 +err_fin 110442.96875 +sparsity check 0.2999997138977051 +time 1.36 +78 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 219353.734375 +err_fin 182995.84375 +sparsity check 0.2999997138977051 +time 1.33 +78 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 89409.109375 +err_fin 47214.2734375 +sparsity check 0.2999999672174454 +time 68.88 +78 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5719522.0 +err_fin 3957060.5 +sparsity check 0.2999999906335558 +time 138.79 +78 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5377522.0 +err_fin 3702593.0 +sparsity check 0.2999999906335558 +time 139.14 +78 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1843521.25 +err_fin 1475283.75 +sparsity check 0.2999999906335558 +time 136.86 +79 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 204026.3125 +err_fin 139487.578125 +sparsity check 0.2999999672174454 +time 75.98 +79 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 111417.5390625 +err_fin 84433.484375 +sparsity check 0.2999997138977051 +time 1.35 +79 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 98769.3203125 +err_fin 77037.796875 +sparsity check 0.2999997138977051 +time 1.35 +79 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 25067.11328125 +err_fin 9912.486328125 +sparsity check 0.2999999672174454 +time 68.86 +79 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3504688.5 +err_fin 2242649.5 +sparsity check 0.2999999906335558 +time 138.88 +79 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3212513.0 +err_fin 2050016.75 +sparsity check 0.2999999906335558 +time 139.19 +79 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1486937.5 +err_fin 1014967.25 +sparsity check 0.2999999906335558 +time 136.74 +model.embed_tokens.weight tensor(2.5520e-06) +model.layers.0.self_attn.q_proj.weight tensor(0.0139) +model.layers.0.self_attn.k_proj.weight tensor(0.0296) +model.layers.0.self_attn.v_proj.weight tensor(0.0791) +model.layers.0.self_attn.o_proj.weight tensor(4.0084e-06) +model.layers.0.mlp.gate_proj.weight tensor(0.0001) +model.layers.0.mlp.up_proj.weight tensor(0.0001) +model.layers.0.mlp.down_proj.weight tensor(0.0185) +51398.38483381271 +Dataset: wikitext2 +Evaluating ... +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +Perplexity: 4.561863 diff --git a/logs/llama2-70-0.7-fix-mask b/logs/llama2-70-0.7-fix-mask new file mode 100644 index 0000000..b499385 --- /dev/null +++ b/logs/llama2-70-0.7-fix-mask @@ -0,0 +1,4020 @@ +Running on dev: cuda:0 +loading llama +llama loaded +Starting... on device cuda:0 +model.layers.0.self_attn.q_proj.weight torch.Size([8192, 8192]) (8192, 8192) 0.1 +model.layers.0.self_attn.k_proj.weight torch.Size([1024, 8192]) (1024, 8192) 0.2 +model.layers.0.mlp.gate_proj.weight torch.Size([28672, 8192]) (8192, 28672) 0.2 +Ready. +0 self_attn.q_proj +Pruning ... +0.28259551525115967 0.08259549736976624 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1331873331981834 0.9709505944546686 1.0 +err_prefin 146.79672241210938 +err_fin 42.25421905517578 +sparsity check 0.28259551525115967 +time 75.06 +0 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 143.55474853515625 +err_fin 39.225311279296875 +sparsity check 0.29999983310699463 +time 1.33 +0 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 18.346843719482422 +err_fin 9.327608108520508 +sparsity check 0.29999983310699463 +time 1.33 +0 self_attn.o_proj +Pruning ... +0.2628120183944702 0.06281200051307678 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.0604359525883797 0.9709505944546686 1.0 +err_prefin 7.877456188201904 +err_fin 0.5982409715652466 +sparsity check 0.2628120183944702 +time 67.91 +0 mlp.gate_proj +Pruning ... +0.29999276995658875 0.1999746859073639 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060101194662678 0.9709505944546686 1.0 +err_prefin 2311.3427734375 +err_fin 819.235107421875 +sparsity check 0.29999276995658875 +time 137.85 +0 mlp.up_proj +Pruning ... +0.29999276995658875 0.1999746859073639 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060101194662678 0.9709505944546686 1.0 +err_prefin 2341.20654296875 +err_fin 820.5517578125 +sparsity check 0.29999276995658875 +time 138.12 +0 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 51.23821258544922 +err_fin 22.18744659423828 +sparsity check 0.29999999489103046 +time 135.63 +1 self_attn.q_proj +Pruning ... +0.2640542834997177 0.06405426561832428 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.0652610300661904 0.9709505944546686 1.0 +err_prefin 2363.85888671875 +err_fin 517.98046875 +sparsity check 0.2640542834997177 +time 75.10 +1 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 2169.40087890625 +err_fin 510.3336181640625 +sparsity check 0.29999983310699463 +time 1.31 +1 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 127.56815338134766 +err_fin 58.54473114013672 +sparsity check 0.29999983310699463 +time 1.30 +1 self_attn.o_proj +Pruning ... +0.2670059949159622 0.06700597703456879 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.0765780080386531 0.9709505944546686 1.0 +err_prefin 146.55723571777344 +err_fin 31.407405853271484 +sparsity check 0.2670059949159622 +time 67.95 +1 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 19178.1171875 +err_fin 6323.96240234375 +sparsity check 0.29999999489103046 +time 137.86 +1 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 21308.6640625 +err_fin 6805.1181640625 +sparsity check 0.29999999489103046 +time 138.07 +1 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 732.9593505859375 +err_fin 566.9931640625 +sparsity check 0.29999999489103046 +time 135.70 +2 self_attn.q_proj +Pruning ... +0.2932608723640442 0.09326085448265076 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1691896024446529 0.9709505944546686 1.0 +err_prefin 7611.6748046875 +err_fin 2654.669921875 +sparsity check 0.2932608723640442 +time 75.02 +2 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 9215.1044921875 +err_fin 3673.41015625 +sparsity check 0.29999983310699463 +time 1.34 +2 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 577.298095703125 +err_fin 340.20489501953125 +sparsity check 0.29999983310699463 +time 1.32 +2 self_attn.o_proj +Pruning ... +0.2901856452226639 0.09018562734127045 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1590172442785198 0.9709505944546686 1.0 +err_prefin 695.6298217773438 +err_fin 261.84814453125 +sparsity check 0.2901856452226639 +time 67.89 +2 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 71745.296875 +err_fin 31993.83984375 +sparsity check 0.29999999489103046 +time 137.81 +2 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 74531.984375 +err_fin 32673.53125 +sparsity check 0.29999999489103046 +time 138.22 +2 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 2582.0546875 +err_fin 2160.287109375 +sparsity check 0.29999999489103046 +time 135.61 +3 self_attn.q_proj +Pruning ... +0.29991354048252106 0.09991352260112762 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1906495374166257 0.9709505944546686 1.0 +err_prefin 62766.71875 +err_fin 31050.1875 +sparsity check 0.29991354048252106 +time 74.96 +3 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 40575.09375 +err_fin 23052.455078125 +sparsity check 0.29999983310699463 +time 1.32 +3 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 7380.71875 +err_fin 4846.990234375 +sparsity check 0.29999983310699463 +time 1.31 +3 self_attn.o_proj +Pruning ... +0.29522277414798737 0.09522275626659393 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1755946479997674 0.9709505944546686 1.0 +err_prefin 1127.72119140625 +err_fin 557.7116088867188 +sparsity check 0.29522277414798737 +time 67.83 +3 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 151899.5625 +err_fin 81245.1875 +sparsity check 0.29999999489103046 +time 137.76 +3 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 151964.3125 +err_fin 80370.4921875 +sparsity check 0.29999999489103046 +time 138.06 +3 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 3375.807861328125 +err_fin 2967.864990234375 +sparsity check 0.29999999489103046 +time 135.80 +4 self_attn.q_proj +Pruning ... +0.29966770112514496 0.09966768324375153 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1898694190435064 0.9709505944546686 1.0 +err_prefin 116636.96875 +err_fin 62684.6953125 +sparsity check 0.29966770112514496 +time 74.93 +4 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 53774.46875 +err_fin 30973.330078125 +sparsity check 0.29999983310699463 +time 1.34 +4 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 11458.361328125 +err_fin 8175.4697265625 +sparsity check 0.29999983310699463 +time 1.31 +4 self_attn.o_proj +Pruning ... +0.29358045756816864 0.09358043968677521 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1702374010967032 0.9709505944546686 1.0 +err_prefin 1469.5662841796875 +err_fin 767.6857299804688 +sparsity check 0.29358045756816864 +time 67.85 +4 mlp.gate_proj +Pruning ... +0.2999720743724278 0.1999022513628006 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0059687168141702 0.9709505944546686 1.0 +err_prefin 241601.171875 +err_fin 143953.28125 +sparsity check 0.2999720743724278 +time 137.78 +4 mlp.up_proj +Pruning ... +0.2999230538095747 0.19973067939281464 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0058705945342121 0.9709505944546686 1.0 +err_prefin 236393.453125 +err_fin 139779.84375 +sparsity check 0.2999230538095747 +time 138.19 +4 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 5088.666015625 +err_fin 4538.8662109375 +sparsity check 0.29999999489103046 +time 135.71 +5 self_attn.q_proj +Pruning ... +0.2999517172574997 0.09995169937610626 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1907705961829542 0.9709505944546686 1.0 +err_prefin 182847.59375 +err_fin 107919.828125 +sparsity check 0.2999517172574997 +time 74.95 +5 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 78792.828125 +err_fin 51433.125 +sparsity check 0.29999983310699463 +time 1.33 +5 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 17187.236328125 +err_fin 12823.0703125 +sparsity check 0.29999983310699463 +time 1.31 +5 self_attn.o_proj +Pruning ... +0.2964719831943512 0.09647196531295776 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1796392943690934 0.9709505944546686 1.0 +err_prefin 2450.074462890625 +err_fin 1195.0565185546875 +sparsity check 0.2964719831943512 +time 67.84 +5 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 298847.28125 +err_fin 187423.8125 +sparsity check 0.29999999489103046 +time 137.77 +5 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 291472.53125 +err_fin 181482.3125 +sparsity check 0.29999999489103046 +time 138.06 +5 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 7176.54296875 +err_fin 6329.83203125 +sparsity check 0.29999999489103046 +time 135.79 +6 self_attn.q_proj +Pruning ... +0.29998789727687836 0.09998787939548492 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908853016546297 0.9709505944546686 1.0 +err_prefin 206867.84375 +err_fin 128255.5703125 +sparsity check 0.29998789727687836 +time 74.92 +6 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 83216.7734375 +err_fin 57516.95703125 +sparsity check 0.29999983310699463 +time 1.34 +6 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 19365.34765625 +err_fin 15202.09375 +sparsity check 0.29999983310699463 +time 1.33 +6 self_attn.o_proj +Pruning ... +0.2905711680650711 0.09057115018367767 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1603015048610001 0.9709505944546686 1.0 +err_prefin 4286.0869140625 +err_fin 2491.453125 +sparsity check 0.2905711680650711 +time 67.86 +6 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 395216.5 +err_fin 255924.375 +sparsity check 0.29999999489103046 +time 137.76 +6 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 382376.84375 +err_fin 246421.4375 +sparsity check 0.29999999489103046 +time 138.20 +6 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 10638.2509765625 +err_fin 9528.076171875 +sparsity check 0.29999999489103046 +time 135.73 +7 self_attn.q_proj +Pruning ... +0.29998789727687836 0.09998787939548492 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908853016546297 0.9709505944546686 1.0 +err_prefin 341452.1875 +err_fin 224390.28125 +sparsity check 0.29998789727687836 +time 74.94 +7 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 114694.46875 +err_fin 84191.5 +sparsity check 0.29999983310699463 +time 1.32 +7 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 27540.373046875 +err_fin 22415.71484375 +sparsity check 0.29999983310699463 +time 1.31 +7 self_attn.o_proj +Pruning ... +0.2873151898384094 0.08731517195701599 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1493722758084872 0.9709505944546686 1.0 +err_prefin 5619.8203125 +err_fin 3063.083251953125 +sparsity check 0.2873151898384094 +time 67.88 +7 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 486262.3125 +err_fin 324045.4375 +sparsity check 0.29999999489103046 +time 137.77 +7 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 468986.375 +err_fin 311134.46875 +sparsity check 0.29999999489103046 +time 138.03 +7 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 15176.83203125 +err_fin 13571.203125 +sparsity check 0.29999999489103046 +time 135.77 +8 self_attn.q_proj +Pruning ... +0.29998789727687836 0.09998787939548492 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908853016546297 0.9709505944546686 1.0 +err_prefin 300449.0 +err_fin 200402.9375 +sparsity check 0.29998789727687836 +time 74.93 +8 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 113408.46875 +err_fin 79877.375 +sparsity check 0.29999983310699463 +time 1.33 +8 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 25838.47265625 +err_fin 21134.3828125 +sparsity check 0.29999983310699463 +time 1.32 +8 self_attn.o_proj +Pruning ... +0.29325757920742035 0.09325756132602692 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1691787963723583 0.9709505944546686 1.0 +err_prefin 9909.5068359375 +err_fin 5343.287109375 +sparsity check 0.29325757920742035 +time 67.88 +8 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 664751.875 +err_fin 459721.625 +sparsity check 0.29999999489103046 +time 137.75 +8 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 628046.375 +err_fin 432942.5625 +sparsity check 0.29999999489103046 +time 138.19 +8 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 28026.904296875 +err_fin 25225.08203125 +sparsity check 0.29999999489103046 +time 135.61 +9 self_attn.q_proj +Pruning ... +0.29997682571411133 0.0999768078327179 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908502024971612 0.9709505944546686 1.0 +err_prefin 321847.90625 +err_fin 231912.484375 +sparsity check 0.29997682571411133 +time 74.95 +9 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 106476.0 +err_fin 83721.1953125 +sparsity check 0.29999983310699463 +time 1.34 +9 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 37827.9921875 +err_fin 32805.93359375 +sparsity check 0.29999983310699463 +time 1.31 +9 self_attn.o_proj +Pruning ... +0.28864505887031555 0.08864504098892212 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1538590273766385 0.9709505944546686 1.0 +err_prefin 5372.61865234375 +err_fin 2617.85302734375 +sparsity check 0.28864505887031555 +time 67.85 +9 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 839131.25 +err_fin 594295.25 +sparsity check 0.29999999489103046 +time 137.71 +9 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 788290.375 +err_fin 556090.75 +sparsity check 0.29999999489103046 +time 138.07 +9 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 26872.58203125 +err_fin 24459.40625 +sparsity check 0.29999999489103046 +time 135.75 +10 self_attn.q_proj +Pruning ... +0.29997682571411133 0.0999768078327179 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908502024971612 0.9709505944546686 1.0 +err_prefin 196492.09375 +err_fin 141329.734375 +sparsity check 0.29997682571411133 +time 74.94 +10 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 73369.4375 +err_fin 54307.40234375 +sparsity check 0.29999983310699463 +time 1.34 +10 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 37013.75 +err_fin 31610.794921875 +sparsity check 0.29999983310699463 +time 1.31 +10 self_attn.o_proj +Pruning ... +0.2763001024723053 0.07630008459091187 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1109343374362552 0.9709505944546686 1.0 +err_prefin 3928.4443359375 +err_fin 2009.77587890625 +sparsity check 0.2763001024723053 +time 67.87 +10 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1003815.6875 +err_fin 740524.0 +sparsity check 0.29999999489103046 +time 137.74 +10 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 939789.625 +err_fin 691014.8125 +sparsity check 0.29999999489103046 +time 138.18 +10 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 29326.943359375 +err_fin 26999.7421875 +sparsity check 0.29999999489103046 +time 135.68 +11 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 316608.9375 +err_fin 226435.78125 +sparsity check 0.29998879134655 +time 74.92 +11 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 122440.640625 +err_fin 89522.34375 +sparsity check 0.29999983310699463 +time 1.34 +11 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 44243.2734375 +err_fin 37031.53515625 +sparsity check 0.29999983310699463 +time 1.31 +11 self_attn.o_proj +Pruning ... +0.2790084034204483 0.07900838553905487 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1206036320433004 0.9709505944546686 1.0 +err_prefin 6518.07275390625 +err_fin 3281.354248046875 +sparsity check 0.2790084034204483 +time 67.88 +11 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1071501.5 +err_fin 798371.5 +sparsity check 0.29999999489103046 +time 137.76 +11 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1009022.8125 +err_fin 749514.125 +sparsity check 0.29999999489103046 +time 138.03 +11 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 32609.2109375 +err_fin 30018.953125 +sparsity check 0.29999999489103046 +time 135.73 +12 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 261794.4375 +err_fin 187452.65625 +sparsity check 0.29998879134655 +time 74.92 +12 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 109502.671875 +err_fin 80291.7421875 +sparsity check 0.29999983310699463 +time 1.33 +12 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 33745.0625 +err_fin 27522.99609375 +sparsity check 0.29999983310699463 +time 1.31 +12 self_attn.o_proj +Pruning ... +0.2816329002380371 0.08163288235664368 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1298349114545376 0.9709505944546686 1.0 +err_prefin 7192.89404296875 +err_fin 3595.00830078125 +sparsity check 0.2816329002380371 +time 67.88 +12 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1063842.5 +err_fin 794042.6875 +sparsity check 0.29999999489103046 +time 137.76 +12 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1013963.625 +err_fin 754538.5 +sparsity check 0.29999999489103046 +time 138.17 +12 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 36493.3984375 +err_fin 33421.99609375 +sparsity check 0.29999999489103046 +time 135.71 +13 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 352666.5 +err_fin 255618.4375 +sparsity check 0.29998879134655 +time 74.92 +13 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 123216.078125 +err_fin 92612.046875 +sparsity check 0.29999983310699463 +time 1.34 +13 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 44567.5234375 +err_fin 37710.37109375 +sparsity check 0.29999983310699463 +time 1.31 +13 self_attn.o_proj +Pruning ... +0.2752116918563843 0.07521167397499084 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1070064196849327 0.9709505944546686 1.0 +err_prefin 18506.728515625 +err_fin 10081.5458984375 +sparsity check 0.2752116918563843 +time 67.85 +13 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1141047.5 +err_fin 837738.125 +sparsity check 0.29999999489103046 +time 137.78 +13 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1097610.125 +err_fin 803344.0 +sparsity check 0.29999999489103046 +time 138.05 +13 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 48132.16015625 +err_fin 44068.17578125 +sparsity check 0.29999999489103046 +time 135.78 +14 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 577450.4375 +err_fin 423265.3125 +sparsity check 0.29998879134655 +time 74.95 +14 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 184470.28125 +err_fin 141238.21875 +sparsity check 0.29999983310699463 +time 1.34 +14 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 63341.81640625 +err_fin 54127.0234375 +sparsity check 0.29999983310699463 +time 1.31 +14 self_attn.o_proj +Pruning ... +0.2884829193353653 0.08848290145397186 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.153313695407239 0.9709505944546686 1.0 +err_prefin 23199.708984375 +err_fin 12293.0634765625 +sparsity check 0.2884829193353653 +time 67.84 +14 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1312165.5 +err_fin 990560.625 +sparsity check 0.29999999489103046 +time 137.75 +14 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1261362.0 +err_fin 949626.125 +sparsity check 0.29999999489103046 +time 138.17 +14 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 50792.34765625 +err_fin 46890.79296875 +sparsity check 0.29999999489103046 +time 135.65 +15 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 424490.4375 +err_fin 318170.1875 +sparsity check 0.29998879134655 +time 74.94 +15 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 135404.0625 +err_fin 105356.4765625 +sparsity check 0.29999983310699463 +time 1.34 +15 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 73779.875 +err_fin 64663.421875 +sparsity check 0.29999983310699463 +time 1.31 +15 self_attn.o_proj +Pruning ... +0.2687942236661911 0.06879420578479767 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.08333579718327 0.9709505944546686 1.0 +err_prefin 19743.36328125 +err_fin 11142.53125 +sparsity check 0.2687942236661911 +time 67.89 +15 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1412708.625 +err_fin 1071747.0 +sparsity check 0.29999999489103046 +time 137.77 +15 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1364112.125 +err_fin 1031497.375 +sparsity check 0.29999999489103046 +time 138.03 +15 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 58260.37890625 +err_fin 53685.796875 +sparsity check 0.29999999489103046 +time 135.68 +16 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 410965.8125 +err_fin 311465.84375 +sparsity check 0.29998879134655 +time 74.94 +16 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 150358.703125 +err_fin 116443.578125 +sparsity check 0.29999983310699463 +time 1.32 +16 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 68976.875 +err_fin 60407.609375 +sparsity check 0.29999983310699463 +time 1.31 +16 self_attn.o_proj +Pruning ... +0.2813430279493332 0.08134301006793976 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1288219119178806 0.9709505944546686 1.0 +err_prefin 17627.57421875 +err_fin 9628.939453125 +sparsity check 0.2813430279493332 +time 67.87 +16 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1482370.375 +err_fin 1138280.75 +sparsity check 0.29999999489103046 +time 137.75 +16 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1436231.375 +err_fin 1100404.25 +sparsity check 0.29999999489103046 +time 138.16 +16 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 58589.7890625 +err_fin 54233.8203125 +sparsity check 0.29999999489103046 +time 135.72 +17 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 526197.0 +err_fin 392983.5 +sparsity check 0.29998879134655 +time 74.91 +17 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 180334.875 +err_fin 137717.9375 +sparsity check 0.29999983310699463 +time 1.31 +17 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 62122.34765625 +err_fin 52657.75 +sparsity check 0.29999983310699463 +time 1.31 +17 self_attn.o_proj +Pruning ... +0.29516535997390747 0.09516534209251404 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1754081285736342 0.9709505944546686 1.0 +err_prefin 27624.837890625 +err_fin 13451.22265625 +sparsity check 0.29516535997390747 +time 67.87 +17 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1475431.25 +err_fin 1111972.75 +sparsity check 0.29999999489103046 +time 137.77 +17 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1433489.5 +err_fin 1076994.5 +sparsity check 0.29999999489103046 +time 137.99 +17 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 74916.5 +err_fin 67816.53125 +sparsity check 0.29999999489103046 +time 135.75 +18 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 502272.0 +err_fin 378369.78125 +sparsity check 0.29998879134655 +time 74.92 +18 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 164821.15625 +err_fin 128557.6875 +sparsity check 0.29999983310699463 +time 1.32 +18 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 91514.578125 +err_fin 81140.8828125 +sparsity check 0.29999983310699463 +time 1.31 +18 self_attn.o_proj +Pruning ... +0.2797379046678543 0.07973788678646088 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1231830746767248 0.9709505944546686 1.0 +err_prefin 28444.716796875 +err_fin 14717.9296875 +sparsity check 0.2797379046678543 +time 67.88 +18 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1614599.25 +err_fin 1230537.0 +sparsity check 0.29999999489103046 +time 137.76 +18 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1558061.25 +err_fin 1183164.75 +sparsity check 0.29999999489103046 +time 138.18 +18 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 78702.421875 +err_fin 71932.4375 +sparsity check 0.29999999489103046 +time 135.78 +19 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 576192.0 +err_fin 444806.375 +sparsity check 0.29998879134655 +time 74.91 +19 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 175761.90625 +err_fin 142242.375 +sparsity check 0.29999983310699463 +time 1.33 +19 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 106367.53125 +err_fin 96257.84375 +sparsity check 0.29999983310699463 +time 1.31 +19 self_attn.o_proj +Pruning ... +0.26657745242118835 0.06657743453979492 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.0749476349473879 0.9709505944546686 1.0 +err_prefin 22884.625 +err_fin 12479.001953125 +sparsity check 0.26657745242118835 +time 67.94 +19 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1743903.5 +err_fin 1336926.75 +sparsity check 0.29999999489103046 +time 137.75 +19 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1683411.375 +err_fin 1287939.625 +sparsity check 0.29999999489103046 +time 138.03 +19 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 80935.0 +err_fin 74760.453125 +sparsity check 0.29999999489103046 +time 135.74 +20 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 510591.09375 +err_fin 396992.5 +sparsity check 0.29998879134655 +time 74.94 +20 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 158409.21875 +err_fin 126875.9765625 +sparsity check 0.29999983310699463 +time 1.32 +20 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 86279.90625 +err_fin 78110.109375 +sparsity check 0.29999983310699463 +time 1.30 +20 self_attn.o_proj +Pruning ... +0.2718873471021652 0.07188732922077179 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.0948557784689537 0.9709505944546686 1.0 +err_prefin 19584.56640625 +err_fin 9591.591796875 +sparsity check 0.2718873471021652 +time 67.88 +20 mlp.gate_proj +Pruning ... +0.29999288490840365 0.19997508823871613 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060103493959285 0.9709505944546686 1.0 +err_prefin 1852608.875 +err_fin 1424023.625 +sparsity check 0.29999288490840365 +time 137.77 +20 mlp.up_proj +Pruning ... +0.29999288490840365 0.19997508823871613 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060103493959285 0.9709505944546686 1.0 +err_prefin 1789557.75 +err_fin 1372617.5 +sparsity check 0.29999288490840365 +time 138.16 +20 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 81356.515625 +err_fin 75384.609375 +sparsity check 0.29999999489103046 +time 135.72 +21 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 327392.375 +err_fin 251870.3125 +sparsity check 0.29998879134655 +time 74.97 +21 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 113291.296875 +err_fin 87867.2421875 +sparsity check 0.29999983310699463 +time 1.34 +21 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 75309.59375 +err_fin 66831.5078125 +sparsity check 0.29999983310699463 +time 1.32 +21 self_attn.o_proj +Pruning ... +0.26312975585460663 0.0631297379732132 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.0616736476882211 0.9709505944546686 1.0 +err_prefin 24898.46484375 +err_fin 13261.9765625 +sparsity check 0.26312975585460663 +time 67.88 +21 mlp.gate_proj +Pruning ... +0.29999288490840365 0.19997508823871613 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060103493959285 0.9709505944546686 1.0 +err_prefin 1920540.25 +err_fin 1477476.5 +sparsity check 0.29999288490840365 +time 137.77 +21 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1864768.75 +err_fin 1432040.25 +sparsity check 0.29999999489103046 +time 138.08 +21 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 84583.421875 +err_fin 78796.125 +sparsity check 0.29999999489103046 +time 135.67 +22 self_attn.q_proj +Pruning ... +0.29997682571411133 0.0999768078327179 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908502024971612 0.9709505944546686 1.0 +err_prefin 303901.75 +err_fin 236533.03125 +sparsity check 0.29997682571411133 +time 74.94 +22 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 120275.90625 +err_fin 97766.234375 +sparsity check 0.29999983310699463 +time 1.35 +22 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 83501.3203125 +err_fin 75249.4765625 +sparsity check 0.29999983310699463 +time 1.31 +22 self_attn.o_proj +Pruning ... +0.27621839940547943 0.076218381524086 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.110640328608706 0.9709505944546686 1.0 +err_prefin 16023.5283203125 +err_fin 8324.244140625 +sparsity check 0.27621839940547943 +time 67.91 +22 mlp.gate_proj +Pruning ... +0.29999288490840365 0.19997508823871613 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060103493959285 0.9709505944546686 1.0 +err_prefin 2001608.125 +err_fin 1537090.25 +sparsity check 0.29999288490840365 +time 137.77 +22 mlp.up_proj +Pruning ... +0.29999288490840365 0.19997508823871613 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060103493959285 0.9709505944546686 1.0 +err_prefin 1948519.75 +err_fin 1492786.75 +sparsity check 0.29999288490840365 +time 138.18 +22 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 86168.171875 +err_fin 80430.5234375 +sparsity check 0.29999999489103046 +time 135.58 +23 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 440584.78125 +err_fin 337327.71875 +sparsity check 0.29998879134655 +time 75.13 +23 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 167621.515625 +err_fin 133256.59375 +sparsity check 0.29999983310699463 +time 1.33 +23 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 89806.90625 +err_fin 80290.171875 +sparsity check 0.29999983310699463 +time 1.31 +23 self_attn.o_proj +Pruning ... +0.2784363329410553 0.07843631505966187 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1185734717779658 0.9709505944546686 1.0 +err_prefin 24210.25390625 +err_fin 12262.890625 +sparsity check 0.2784363329410553 +time 68.08 +23 mlp.gate_proj +Pruning ... +0.29999288490840365 0.19997508823871613 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060103493959285 0.9709505944546686 1.0 +err_prefin 2035726.5 +err_fin 1565690.5 +sparsity check 0.29999288490840365 +time 138.21 +23 mlp.up_proj +Pruning ... +0.29999288490840365 0.19997508823871613 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060103493959285 0.9709505944546686 1.0 +err_prefin 1986447.5 +err_fin 1523848.25 +sparsity check 0.29999288490840365 +time 138.51 +23 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 88675.1953125 +err_fin 82281.703125 +sparsity check 0.29999999489103046 +time 136.19 +24 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 391728.25 +err_fin 294734.8125 +sparsity check 0.29998879134655 +time 75.07 +24 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 149414.109375 +err_fin 115625.390625 +sparsity check 0.29999983310699463 +time 1.32 +24 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 69658.3125 +err_fin 59851.4921875 +sparsity check 0.29999983310699463 +time 1.31 +24 self_attn.o_proj +Pruning ... +0.2838824987411499 0.08388248085975647 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1376418772496466 0.9709505944546686 1.0 +err_prefin 20319.00390625 +err_fin 11150.955078125 +sparsity check 0.2838824987411499 +time 68.02 +24 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1986341.25 +err_fin 1530140.25 +sparsity check 0.29999999489103046 +time 138.15 +24 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1955876.25 +err_fin 1504181.625 +sparsity check 0.29999999489103046 +time 138.49 +24 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 90355.4375 +err_fin 83795.28125 +sparsity check 0.29999999489103046 +time 135.96 +25 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 463368.28125 +err_fin 349878.0 +sparsity check 0.29998879134655 +time 75.09 +25 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 173160.0625 +err_fin 133917.875 +sparsity check 0.29999983310699463 +time 1.33 +25 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 73108.625 +err_fin 62942.953125 +sparsity check 0.29999983310699463 +time 1.31 +25 self_attn.o_proj +Pruning ... +0.27712246775627136 0.07712244987487793 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1138860353396238 0.9709505944546686 1.0 +err_prefin 51899.3515625 +err_fin 28122.509765625 +sparsity check 0.27712246775627136 +time 68.04 +25 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1935228.25 +err_fin 1446518.875 +sparsity check 0.29999999489103046 +time 138.16 +25 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1924336.25 +err_fin 1433656.25 +sparsity check 0.29999999489103046 +time 138.43 +25 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 130152.5 +err_fin 118920.578125 +sparsity check 0.29999999489103046 +time 136.08 +26 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 668758.875 +err_fin 509217.65625 +sparsity check 0.29998879134655 +time 75.10 +26 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 225106.59375 +err_fin 179373.0625 +sparsity check 0.29999983310699463 +time 1.34 +26 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 113069.515625 +err_fin 97972.1328125 +sparsity check 0.29999983310699463 +time 1.30 +26 self_attn.o_proj +Pruning ... +0.28410089015960693 0.0841008722782135 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1383946818247657 0.9709505944546686 1.0 +err_prefin 61961.640625 +err_fin 33807.609375 +sparsity check 0.28410089015960693 +time 68.04 +26 mlp.gate_proj +Pruning ... +0.29999288490840365 0.19997508823871613 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060103493959285 0.9709505944546686 1.0 +err_prefin 2194201.0 +err_fin 1659860.75 +sparsity check 0.29999288490840365 +time 138.14 +26 mlp.up_proj +Pruning ... +0.29999288490840365 0.19997508823871613 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060103493959285 0.9709505944546686 1.0 +err_prefin 2169127.25 +err_fin 1636336.5 +sparsity check 0.29999288490840365 +time 138.55 +26 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 127526.078125 +err_fin 119189.890625 +sparsity check 0.29999999489103046 +time 136.01 +27 self_attn.q_proj +Pruning ... +0.29997682571411133 0.0999768078327179 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908502024971612 0.9709505944546686 1.0 +err_prefin 308544.71875 +err_fin 238041.40625 +sparsity check 0.29997682571411133 +time 75.09 +27 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 127479.53125 +err_fin 101545.5078125 +sparsity check 0.29999983310699463 +time 1.34 +27 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 91094.3125 +err_fin 82046.3125 +sparsity check 0.29999983310699463 +time 1.31 +27 self_attn.o_proj +Pruning ... +0.28035055100917816 0.08035053312778473 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1253412433754744 0.9709505944546686 1.0 +err_prefin 25478.0859375 +err_fin 12188.7265625 +sparsity check 0.28035055100917816 +time 68.03 +27 mlp.gate_proj +Pruning ... +0.29999288490840365 0.19997508823871613 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060103493959285 0.9709505944546686 1.0 +err_prefin 2319278.0 +err_fin 1761144.875 +sparsity check 0.29999288490840365 +time 138.14 +27 mlp.up_proj +Pruning ... +0.29999288490840365 0.19997508823871613 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060103493959285 0.9709505944546686 1.0 +err_prefin 2286411.0 +err_fin 1731594.875 +sparsity check 0.29999288490840365 +time 138.43 +27 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 134304.78125 +err_fin 125860.28125 +sparsity check 0.29999999489103046 +time 136.03 +28 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 556808.125 +err_fin 429353.5625 +sparsity check 0.29998879134655 +time 75.08 +28 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 198106.21875 +err_fin 163720.28125 +sparsity check 0.29999983310699463 +time 1.35 +28 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 141281.0625 +err_fin 126776.921875 +sparsity check 0.29999983310699463 +time 1.32 +28 self_attn.o_proj +Pruning ... +0.27586427330970764 0.07586425542831421 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1093644214291734 0.9709505944546686 1.0 +err_prefin 44421.6015625 +err_fin 24812.359375 +sparsity check 0.27586427330970764 +time 68.04 +28 mlp.gate_proj +Pruning ... +0.29999288490840365 0.19997508823871613 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060103493959285 0.9709505944546686 1.0 +err_prefin 2449181.25 +err_fin 1867147.25 +sparsity check 0.29999288490840365 +time 138.11 +28 mlp.up_proj +Pruning ... +0.29999288490840365 0.19997508823871613 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060103493959285 0.9709505944546686 1.0 +err_prefin 2413979.75 +err_fin 1834742.75 +sparsity check 0.29999288490840365 +time 138.53 +28 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 143911.40625 +err_fin 134980.046875 +sparsity check 0.29999999489103046 +time 136.01 +29 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 556052.3125 +err_fin 429065.28125 +sparsity check 0.29998879134655 +time 75.08 +29 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 190485.8125 +err_fin 154461.734375 +sparsity check 0.29999983310699463 +time 1.36 +29 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 150277.90625 +err_fin 134620.03125 +sparsity check 0.29999983310699463 +time 1.33 +29 self_attn.o_proj +Pruning ... +0.2676489055156708 0.06764888763427734 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.0790159956155794 0.9709505944546686 1.0 +err_prefin 41584.5546875 +err_fin 25433.015625 +sparsity check 0.2676489055156708 +time 68.04 +29 mlp.gate_proj +Pruning ... +0.29999288490840365 0.19997508823871613 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060103493959285 0.9709505944546686 1.0 +err_prefin 2596596.75 +err_fin 1990537.25 +sparsity check 0.29999288490840365 +time 138.13 +29 mlp.up_proj +Pruning ... +0.29999288490840365 0.19997508823871613 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060103493959285 0.9709505944546686 1.0 +err_prefin 2551580.75 +err_fin 1950915.0 +sparsity check 0.29999288490840365 +time 138.39 +29 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 151312.84375 +err_fin 142841.296875 +sparsity check 0.29999999489103046 +time 136.02 +30 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 284128.6875 +err_fin 221573.90625 +sparsity check 0.29998879134655 +time 75.09 +30 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 97484.125 +err_fin 79525.3671875 +sparsity check 0.29999983310699463 +time 1.34 +30 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 133623.0625 +err_fin 122049.625 +sparsity check 0.29999983310699463 +time 1.32 +30 self_attn.o_proj +Pruning ... +0.2699955552816391 0.06999553740024567 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.0878351277825802 0.9709505944546686 1.0 +err_prefin 19892.140625 +err_fin 11753.15625 +sparsity check 0.2699955552816391 +time 68.05 +30 mlp.gate_proj +Pruning ... +0.29999288490840365 0.19997508823871613 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060103493959285 0.9709505944546686 1.0 +err_prefin 2744453.0 +err_fin 2119522.25 +sparsity check 0.29999288490840365 +time 138.11 +30 mlp.up_proj +Pruning ... +0.29999288490840365 0.19997508823871613 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060103493959285 0.9709505944546686 1.0 +err_prefin 2685503.0 +err_fin 2068581.5 +sparsity check 0.29999288490840365 +time 138.54 +30 mlp.down_proj +Pruning ... +0.2999999906335558 0.19999997317790985 0.242857141154153 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245631764015 0.9709505944546686 1.0 +err_prefin 155463.21875 +err_fin 146958.765625 +sparsity check 0.2999999906335558 +time 136.01 +31 self_attn.q_proj +Pruning ... +0.2999767065048218 0.09997668862342834 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908498245682169 0.9709505944546686 1.0 +err_prefin 586435.4375 +err_fin 456473.5 +sparsity check 0.2999767065048218 +time 75.10 +31 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 190119.671875 +err_fin 157454.9375 +sparsity check 0.29999983310699463 +time 1.33 +31 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 151328.828125 +err_fin 133361.0625 +sparsity check 0.29999983310699463 +time 1.31 +31 self_attn.o_proj +Pruning ... +0.26509250700473785 0.06509248912334442 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.069265072491063 0.9709505944546686 1.0 +err_prefin 47542.30859375 +err_fin 28039.8203125 +sparsity check 0.26509250700473785 +time 68.04 +31 mlp.gate_proj +Pruning ... +0.29998569403375897 0.19994992017745972 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0059959652152723 0.9709505944546686 1.0 +err_prefin 2856960.5 +err_fin 2226228.75 +sparsity check 0.29998569403375897 +time 138.13 +31 mlp.up_proj +Pruning ... +0.29998569403375897 0.19994992017745972 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0059959652152723 0.9709505944546686 1.0 +err_prefin 2789057.25 +err_fin 2168888.5 +sparsity check 0.29998569403375897 +time 138.42 +31 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 156633.078125 +err_fin 148351.5 +sparsity check 0.29999999489103046 +time 136.09 +32 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 645674.375 +err_fin 505763.3125 +sparsity check 0.29998879134655 +time 75.09 +32 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 200602.46875 +err_fin 168077.53125 +sparsity check 0.29999983310699463 +time 1.32 +32 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 132714.8125 +err_fin 120198.78125 +sparsity check 0.29999983310699463 +time 1.33 +32 self_attn.o_proj +Pruning ... +0.2810487300157547 0.08104871213436127 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1277917867336793 0.9709505944546686 1.0 +err_prefin 31878.662109375 +err_fin 17792.8984375 +sparsity check 0.2810487300157547 +time 68.04 +32 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 2937329.5 +err_fin 2285798.75 +sparsity check 0.29999999489103046 +time 138.12 +32 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 2874004.5 +err_fin 2231448.0 +sparsity check 0.29999999489103046 +time 138.53 +32 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 167017.1875 +err_fin 157694.75 +sparsity check 0.29999999489103046 +time 136.01 +33 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 921647.1875 +err_fin 715591.625 +sparsity check 0.29998879134655 +time 75.09 +33 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 260694.234375 +err_fin 211985.234375 +sparsity check 0.29999983310699463 +time 1.34 +33 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 152820.75 +err_fin 135232.0625 +sparsity check 0.29999983310699463 +time 1.33 +33 self_attn.o_proj +Pruning ... +0.27306830883026123 0.0730682909488678 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.0991991034389423 0.9709505944546686 1.0 +err_prefin 76522.5625 +err_fin 41888.80078125 +sparsity check 0.27306830883026123 +time 68.02 +33 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 2978824.0 +err_fin 2269977.0 +sparsity check 0.29999999489103046 +time 138.14 +33 mlp.up_proj +Pruning ... +0.29998569403375897 0.19994992017745972 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0059959652152723 0.9709505944546686 1.0 +err_prefin 2927499.0 +err_fin 2225089.0 +sparsity check 0.29998569403375897 +time 138.38 +33 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 193353.75 +err_fin 181045.84375 +sparsity check 0.29999999489103046 +time 136.04 +34 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 344702.53125 +err_fin 260525.96875 +sparsity check 0.29998879134655 +time 75.08 +34 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 135257.96875 +err_fin 103439.6875 +sparsity check 0.29999983310699463 +time 1.33 +34 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 92529.46875 +err_fin 79457.9375 +sparsity check 0.29999983310699463 +time 1.33 +34 self_attn.o_proj +Pruning ... +0.2805570214986801 0.08055700361728668 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1260669232643261 0.9709505944546686 1.0 +err_prefin 44281.296875 +err_fin 23647.220703125 +sparsity check 0.2805570214986801 +time 68.05 +34 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 3085606.5 +err_fin 2370264.5 +sparsity check 0.29999999489103046 +time 138.09 +34 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 3029326.0 +err_fin 2321283.5 +sparsity check 0.29999999489103046 +time 138.51 +34 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 210513.625 +err_fin 198454.890625 +sparsity check 0.29999999489103046 +time 136.04 +35 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 556998.375 +err_fin 429104.375 +sparsity check 0.29998879134655 +time 75.08 +35 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 172997.640625 +err_fin 135932.15625 +sparsity check 0.29999983310699463 +time 1.32 +35 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 124336.2734375 +err_fin 109908.96875 +sparsity check 0.29999983310699463 +time 1.32 +35 self_attn.o_proj +Pruning ... +0.26645298302173615 0.06645296514034271 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.0744732981503389 0.9709505944546686 1.0 +err_prefin 56546.22265625 +err_fin 28763.7421875 +sparsity check 0.26645298302173615 +time 68.06 +35 mlp.gate_proj +Pruning ... +0.29998569403375897 0.19994992017745972 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0059959652152723 0.9709505944546686 1.0 +err_prefin 3277307.0 +err_fin 2514377.0 +sparsity check 0.29998569403375897 +time 138.10 +35 mlp.up_proj +Pruning ... +0.29998569403375897 0.19994992017745972 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0059959652152723 0.9709505944546686 1.0 +err_prefin 3213722.0 +err_fin 2459371.5 +sparsity check 0.29998569403375897 +time 138.40 +35 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 225026.625 +err_fin 212251.28125 +sparsity check 0.29999999489103046 +time 136.07 +36 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 535441.875 +err_fin 411742.625 +sparsity check 0.29998879134655 +time 75.07 +36 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 167991.1875 +err_fin 133753.921875 +sparsity check 0.29999983310699463 +time 1.35 +36 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 118896.40625 +err_fin 105938.703125 +sparsity check 0.29999983310699463 +time 1.32 +36 self_attn.o_proj +Pruning ... +0.27251090109348297 0.07251088321208954 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.0971528059664624 0.9709505944546686 1.0 +err_prefin 40434.15234375 +err_fin 20990.693359375 +sparsity check 0.27251090109348297 +time 68.04 +36 mlp.gate_proj +Pruning ... +0.29998569403375897 0.19994992017745972 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0059959652152723 0.9709505944546686 1.0 +err_prefin 3363074.75 +err_fin 2574906.75 +sparsity check 0.29998569403375897 +time 138.12 +36 mlp.up_proj +Pruning ... +0.29998569403375897 0.19994992017745972 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0059959652152723 0.9709505944546686 1.0 +err_prefin 3297823.5 +err_fin 2520323.5 +sparsity check 0.29998569403375897 +time 138.51 +36 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 251172.375 +err_fin 236097.359375 +sparsity check 0.29999999489103046 +time 136.04 +37 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 850409.75 +err_fin 653630.625 +sparsity check 0.29998879134655 +time 75.11 +37 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 238505.765625 +err_fin 191663.96875 +sparsity check 0.29999983310699463 +time 1.33 +37 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 154456.4375 +err_fin 137949.3125 +sparsity check 0.29999983310699463 +time 1.31 +37 self_attn.o_proj +Pruning ... +0.27937693893909454 0.07937692105770111 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1219080495563976 0.9709505944546686 1.0 +err_prefin 73303.3359375 +err_fin 35917.9765625 +sparsity check 0.27937693893909454 +time 68.02 +37 mlp.gate_proj +Pruning ... +0.29998569403375897 0.19994992017745972 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0059959652152723 0.9709505944546686 1.0 +err_prefin 3538460.0 +err_fin 2698741.0 +sparsity check 0.29998569403375897 +time 138.13 +37 mlp.up_proj +Pruning ... +0.29998569403375897 0.19994992017745972 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0059959652152723 0.9709505944546686 1.0 +err_prefin 3471317.0 +err_fin 2641933.0 +sparsity check 0.29998569403375897 +time 138.40 +37 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 276686.9375 +err_fin 259926.734375 +sparsity check 0.29999999489103046 +time 136.05 +38 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 954035.625 +err_fin 723900.5 +sparsity check 0.29998879134655 +time 75.10 +38 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 264393.375 +err_fin 206396.0 +sparsity check 0.29999983310699463 +time 1.34 +38 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 168857.5 +err_fin 146324.8125 +sparsity check 0.29999983310699463 +time 1.30 +38 self_attn.o_proj +Pruning ... +0.28340384364128113 0.0834038257598877 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1359887977706427 0.9709505944546686 1.0 +err_prefin 137350.9375 +err_fin 67391.84375 +sparsity check 0.28340384364128113 +time 68.01 +38 mlp.gate_proj +Pruning ... +0.2999928040163858 0.19997480511665344 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060101875936183 0.9709505944546686 1.0 +err_prefin 3666757.5 +err_fin 2786624.0 +sparsity check 0.2999928040163858 +time 138.11 +38 mlp.up_proj +Pruning ... +0.2999928040163858 0.19997480511665344 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060101875936183 0.9709505944546686 1.0 +err_prefin 3605587.5 +err_fin 2732781.5 +sparsity check 0.2999928040163858 +time 138.52 +38 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 314325.625 +err_fin 294774.9375 +sparsity check 0.29999999489103046 +time 136.01 +39 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 1158776.625 +err_fin 881585.875 +sparsity check 0.29998879134655 +time 75.09 +39 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 324029.9375 +err_fin 263872.8125 +sparsity check 0.29999983310699463 +time 1.33 +39 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 184303.484375 +err_fin 160006.0625 +sparsity check 0.29999983310699463 +time 1.31 +39 self_attn.o_proj +Pruning ... +0.289878711104393 0.08987869322299957 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1579929118461283 0.9709505944546686 1.0 +err_prefin 147869.84375 +err_fin 72714.1953125 +sparsity check 0.289878711104393 +time 68.02 +39 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 3731458.0 +err_fin 2807082.0 +sparsity check 0.29999999489103046 +time 138.11 +39 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 3675048.5 +err_fin 2759898.25 +sparsity check 0.29999999489103046 +time 138.39 +39 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 381719.96875 +err_fin 353688.625 +sparsity check 0.29999999489103046 +time 136.06 +40 self_attn.q_proj +Pruning ... +0.29999998211860657 0.09999996423721313 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1909236108740666 0.9709505944546686 1.0 +err_prefin 1080544.125 +err_fin 814920.75 +sparsity check 0.29999998211860657 +time 74.94 +40 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 283680.875 +err_fin 229947.625 +sparsity check 0.29999983310699463 +time 1.33 +40 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 210013.09375 +err_fin 180032.53125 +sparsity check 0.29999983310699463 +time 1.31 +40 self_attn.o_proj +Pruning ... +0.28795187175273895 0.08795185387134552 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1515243001537314 0.9709505944546686 1.0 +err_prefin 196848.125 +err_fin 111018.8984375 +sparsity check 0.28795187175273895 +time 67.94 +40 mlp.gate_proj +Pruning ... +0.2999928040163858 0.19997480511665344 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060101875936183 0.9709505944546686 1.0 +err_prefin 3907574.0 +err_fin 2907535.5 +sparsity check 0.2999928040163858 +time 137.83 +40 mlp.up_proj +Pruning ... +0.2999928040163858 0.19997480511665344 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060101875936183 0.9709505944546686 1.0 +err_prefin 3837530.0 +err_fin 2850688.0 +sparsity check 0.2999928040163858 +time 138.23 +40 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 432315.1875 +err_fin 402098.21875 +sparsity check 0.29999999489103046 +time 135.73 +41 self_attn.q_proj +Pruning ... +0.29999998211860657 0.09999996423721313 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1909236108740666 0.9709505944546686 1.0 +err_prefin 907988.75 +err_fin 672588.875 +sparsity check 0.29999998211860657 +time 74.94 +41 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 247021.3125 +err_fin 195653.671875 +sparsity check 0.29999983310699463 +time 1.35 +41 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 182044.78125 +err_fin 156797.625 +sparsity check 0.29999983310699463 +time 1.31 +41 self_attn.o_proj +Pruning ... +0.28459352254867554 0.0845935046672821 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1400895297749696 0.9709505944546686 1.0 +err_prefin 198913.25 +err_fin 98080.21875 +sparsity check 0.28459352254867554 +time 67.90 +41 mlp.gate_proj +Pruning ... +0.29998569403375897 0.19994992017745972 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0059959652152723 0.9709505944546686 1.0 +err_prefin 4106484.5 +err_fin 3000634.5 +sparsity check 0.29998569403375897 +time 137.83 +41 mlp.up_proj +Pruning ... +0.29998569403375897 0.19994992017745972 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0059959652152723 0.9709505944546686 1.0 +err_prefin 3958873.0 +err_fin 2883165.0 +sparsity check 0.29998569403375897 +time 138.12 +41 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 497553.34375 +err_fin 460157.5625 +sparsity check 0.29999999489103046 +time 135.83 +42 self_attn.q_proj +Pruning ... +0.29999998211860657 0.09999996423721313 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1909236108740666 0.9709505944546686 1.0 +err_prefin 1032662.0625 +err_fin 763616.0 +sparsity check 0.29999998211860657 +time 74.92 +42 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 254862.5 +err_fin 202071.84375 +sparsity check 0.29999983310699463 +time 1.32 +42 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 222320.03125 +err_fin 193921.90625 +sparsity check 0.29999983310699463 +time 1.33 +42 self_attn.o_proj +Pruning ... +0.2862556427717209 0.08625562489032745 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1457746488308302 0.9709505944546686 1.0 +err_prefin 187395.953125 +err_fin 97517.015625 +sparsity check 0.2862556427717209 +time 67.89 +42 mlp.gate_proj +Pruning ... +0.2999928040163858 0.19997480511665344 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060101875936183 0.9709505944546686 1.0 +err_prefin 4441402.0 +err_fin 3230147.5 +sparsity check 0.2999928040163858 +time 137.85 +42 mlp.up_proj +Pruning ... +0.2999928040163858 0.19997480511665344 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060101875936183 0.9709505944546686 1.0 +err_prefin 4174835.5 +err_fin 3027989.0 +sparsity check 0.2999928040163858 +time 138.24 +42 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 527780.8125 +err_fin 488658.875 +sparsity check 0.29999999489103046 +time 135.76 +43 self_attn.q_proj +Pruning ... +0.29999998211860657 0.09999996423721313 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1909236108740666 0.9709505944546686 1.0 +err_prefin 718667.75 +err_fin 530635.625 +sparsity check 0.29999998211860657 +time 74.98 +43 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 224979.0 +err_fin 175419.859375 +sparsity check 0.29999983310699463 +time 1.33 +43 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 181082.515625 +err_fin 152621.78125 +sparsity check 0.29999983310699463 +time 1.33 +43 self_attn.o_proj +Pruning ... +0.2754656672477722 0.07546564936637878 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1079251663770824 0.9709505944546686 1.0 +err_prefin 137791.5625 +err_fin 76316.640625 +sparsity check 0.2754656672477722 +time 67.88 +43 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 4625762.0 +err_fin 3350013.25 +sparsity check 0.29999999489103046 +time 137.81 +43 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 4295103.0 +err_fin 3105253.75 +sparsity check 0.29999999489103046 +time 138.11 +43 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 566437.5 +err_fin 524485.25 +sparsity check 0.29999999489103046 +time 135.82 +44 self_attn.q_proj +Pruning ... +0.29999998211860657 0.09999996423721313 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1909236108740666 0.9709505944546686 1.0 +err_prefin 1223547.5 +err_fin 913486.75 +sparsity check 0.29999998211860657 +time 74.92 +44 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 249873.28125 +err_fin 202698.40625 +sparsity check 0.29999983310699463 +time 1.33 +44 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 269607.875 +err_fin 237971.28125 +sparsity check 0.29999983310699463 +time 1.31 +44 self_attn.o_proj +Pruning ... +0.2958831340074539 0.09588311612606049 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1777359749883376 0.9709505944546686 1.0 +err_prefin 359362.0 +err_fin 208121.984375 +sparsity check 0.2958831340074539 +time 67.87 +44 mlp.gate_proj +Pruning ... +0.2999928040163858 0.19997480511665344 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060101875936183 0.9709505944546686 1.0 +err_prefin 5070381.0 +err_fin 3646746.5 +sparsity check 0.2999928040163858 +time 137.76 +44 mlp.up_proj +Pruning ... +0.2999928040163858 0.19997480511665344 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060101875936183 0.9709505944546686 1.0 +err_prefin 4542354.5 +err_fin 3257371.0 +sparsity check 0.2999928040163858 +time 138.19 +44 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 639691.75 +err_fin 585614.625 +sparsity check 0.29999999489103046 +time 135.71 +45 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 804759.8125 +err_fin 592195.125 +sparsity check 0.29998879134655 +time 74.91 +45 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 232416.375 +err_fin 185826.71875 +sparsity check 0.29999983310699463 +time 1.33 +45 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 245836.375 +err_fin 219475.59375 +sparsity check 0.29999983310699463 +time 1.32 +45 self_attn.o_proj +Pruning ... +0.28903020918369293 0.0890301913022995 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1551525418871482 0.9709505944546686 1.0 +err_prefin 91797.03125 +err_fin 53978.2421875 +sparsity check 0.28903020918369293 +time 67.85 +45 mlp.gate_proj +Pruning ... +0.2999928040163858 0.19997480511665344 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060101875936183 0.9709505944546686 1.0 +err_prefin 5468754.5 +err_fin 3953727.5 +sparsity check 0.2999928040163858 +time 137.84 +45 mlp.up_proj +Pruning ... +0.2999858260154724 0.19995038211345673 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0059962292380278 0.9709505944546686 1.0 +err_prefin 4828201.0 +err_fin 3482285.25 +sparsity check 0.2999858260154724 +time 138.10 +45 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 641580.75 +err_fin 590428.5 +sparsity check 0.29999999489103046 +time 135.82 +46 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 664021.5 +err_fin 493468.5 +sparsity check 0.29998879134655 +time 74.93 +46 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 183038.40625 +err_fin 150695.421875 +sparsity check 0.29999983310699463 +time 1.33 +46 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 224350.125 +err_fin 199914.984375 +sparsity check 0.29999983310699463 +time 1.33 +46 self_attn.o_proj +Pruning ... +0.27695709466934204 0.07695707678794861 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1132935707778988 0.9709505944546686 1.0 +err_prefin 146060.875 +err_fin 87849.25 +sparsity check 0.27695709466934204 +time 67.90 +46 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 5835415.5 +err_fin 4269036.5 +sparsity check 0.29999999489103046 +time 137.83 +46 mlp.up_proj +Pruning ... +0.2999928040163858 0.19997480511665344 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060101875936183 0.9709505944546686 1.0 +err_prefin 5071733.5 +err_fin 3702408.5 +sparsity check 0.2999928040163858 +time 138.26 +46 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 639784.0 +err_fin 593456.375 +sparsity check 0.29999999489103046 +time 135.66 +47 self_attn.q_proj +Pruning ... +0.29999998211860657 0.09999996423721313 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1909236108740666 0.9709505944546686 1.0 +err_prefin 919380.0 +err_fin 682234.1875 +sparsity check 0.29999998211860657 +time 74.93 +47 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 263575.78125 +err_fin 212870.53125 +sparsity check 0.29999983310699463 +time 1.33 +47 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 207595.1875 +err_fin 184903.96875 +sparsity check 0.29999983310699463 +time 1.32 +47 self_attn.o_proj +Pruning ... +0.29235348105430603 0.0923534631729126 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1662050953881062 0.9709505944546686 1.0 +err_prefin 219744.5 +err_fin 105917.7890625 +sparsity check 0.29235348105430603 +time 67.89 +47 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 6036848.5 +err_fin 4374001.0 +sparsity check 0.29999999489103046 +time 137.82 +47 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 5174902.5 +err_fin 3741000.0 +sparsity check 0.29999999489103046 +time 138.02 +47 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 716123.625 +err_fin 657202.25 +sparsity check 0.29999999489103046 +time 135.77 +48 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 465647.5 +err_fin 344852.84375 +sparsity check 0.29998879134655 +time 74.94 +48 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 124884.1875 +err_fin 100681.1484375 +sparsity check 0.29999983310699463 +time 1.33 +48 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 205058.0 +err_fin 184559.46875 +sparsity check 0.29999983310699463 +time 1.31 +48 self_attn.o_proj +Pruning ... +0.276949867606163 0.07694984972476959 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1132676664885035 0.9709505944546686 1.0 +err_prefin 133447.375 +err_fin 79014.34375 +sparsity check 0.276949867606163 +time 67.88 +48 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 6292413.0 +err_fin 4523682.5 +sparsity check 0.29999999489103046 +time 137.80 +48 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 5364602.5 +err_fin 3850003.5 +sparsity check 0.29999999489103046 +time 138.22 +48 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 709427.125 +err_fin 652958.375 +sparsity check 0.29999999489103046 +time 135.74 +49 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 270561.71875 +err_fin 199012.203125 +sparsity check 0.29998879134655 +time 74.94 +49 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 102232.015625 +err_fin 81923.0 +sparsity check 0.29999983310699463 +time 1.34 +49 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 164399.0625 +err_fin 144228.734375 +sparsity check 0.29999983310699463 +time 1.30 +49 self_attn.o_proj +Pruning ... +0.2970356047153473 0.09703558683395386 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1814556951776876 0.9709505944546686 1.0 +err_prefin 45655.078125 +err_fin 25116.39453125 +sparsity check 0.2970356047153473 +time 67.83 +49 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 6509316.0 +err_fin 4686928.0 +sparsity check 0.29999999489103046 +time 137.89 +49 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 5540761.5 +err_fin 3985135.25 +sparsity check 0.29999999489103046 +time 138.16 +49 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 713351.625 +err_fin 659329.75 +sparsity check 0.29999999489103046 +time 135.90 +50 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 404233.15625 +err_fin 298731.5625 +sparsity check 0.29998879134655 +time 74.92 +50 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 113135.2421875 +err_fin 91920.625 +sparsity check 0.29999983310699463 +time 1.34 +50 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 211036.875 +err_fin 186140.515625 +sparsity check 0.29999983310699463 +time 1.31 +50 self_attn.o_proj +Pruning ... +0.2647864520549774 0.06478643417358398 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.0680874055562972 0.9709505944546686 1.0 +err_prefin 101116.8828125 +err_fin 63962.734375 +sparsity check 0.2647864520549774 +time 67.88 +50 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 6731093.5 +err_fin 4858090.5 +sparsity check 0.29999999489103046 +time 137.83 +50 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 5718403.0 +err_fin 4119040.25 +sparsity check 0.29999999489103046 +time 138.23 +50 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 717476.125 +err_fin 664461.75 +sparsity check 0.29999999489103046 +time 135.78 +51 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 523168.25 +err_fin 385879.96875 +sparsity check 0.29998879134655 +time 74.94 +51 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 152884.21875 +err_fin 124647.375 +sparsity check 0.29999983310699463 +time 1.31 +51 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 219848.25 +err_fin 195624.703125 +sparsity check 0.29999983310699463 +time 1.32 +51 self_attn.o_proj +Pruning ... +0.26690903306007385 0.06690901517868042 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.0762094923420098 0.9709505944546686 1.0 +err_prefin 110817.078125 +err_fin 62859.36328125 +sparsity check 0.26690903306007385 +time 67.88 +51 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 6899877.0 +err_fin 5000642.0 +sparsity check 0.29999999489103046 +time 137.77 +51 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 5879860.5 +err_fin 4254378.0 +sparsity check 0.29999999489103046 +time 138.03 +51 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 725606.75 +err_fin 673433.75 +sparsity check 0.29999999489103046 +time 135.80 +52 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 841074.1875 +err_fin 626901.625 +sparsity check 0.29998879134655 +time 74.94 +52 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 178144.6875 +err_fin 148127.53125 +sparsity check 0.29999983310699463 +time 1.33 +52 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 261927.0 +err_fin 233192.109375 +sparsity check 0.29999983310699463 +time 1.31 +52 self_attn.o_proj +Pruning ... +0.2869969606399536 0.08699694275856018 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1482938937343352 0.9709505944546686 1.0 +err_prefin 246878.921875 +err_fin 135332.0 +sparsity check 0.2869969606399536 +time 67.87 +52 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 7014948.0 +err_fin 5115490.5 +sparsity check 0.29999999489103046 +time 137.76 +52 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 5968485.0 +err_fin 4344353.5 +sparsity check 0.29999999489103046 +time 138.18 +52 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 731476.75 +err_fin 680262.75 +sparsity check 0.29999999489103046 +time 135.72 +53 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 281445.96875 +err_fin 210045.75 +sparsity check 0.29998879134655 +time 74.94 +53 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 79969.4296875 +err_fin 65543.421875 +sparsity check 0.29999983310699463 +time 1.32 +53 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 201788.796875 +err_fin 181084.875 +sparsity check 0.29999983310699463 +time 1.31 +53 self_attn.o_proj +Pruning ... +0.27482394874095917 0.07482393085956573 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1056011912180657 0.9709505944546686 1.0 +err_prefin 58068.44921875 +err_fin 35820.52734375 +sparsity check 0.27482394874095917 +time 67.90 +53 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 7190984.5 +err_fin 5225492.0 +sparsity check 0.29999999489103046 +time 137.78 +53 mlp.up_proj +Pruning ... +0.2999930168901171 0.19997555017471313 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060106133887283 0.9709505944546686 1.0 +err_prefin 6130041.0 +err_fin 4447478.0 +sparsity check 0.2999930168901171 +time 138.07 +53 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 743852.6875 +err_fin 691739.75 +sparsity check 0.29999999489103046 +time 135.77 +54 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 329570.0625 +err_fin 244857.578125 +sparsity check 0.29998879134655 +time 74.92 +54 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 90746.875 +err_fin 73247.140625 +sparsity check 0.2999997138977051 +time 1.34 +54 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 209753.625 +err_fin 187811.0625 +sparsity check 0.29999983310699463 +time 1.31 +54 self_attn.o_proj +Pruning ... +0.2679605334997177 0.06796051561832428 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.0801943159570713 0.9709505944546686 1.0 +err_prefin 84472.953125 +err_fin 50208.2265625 +sparsity check 0.2679605334997177 +time 67.88 +54 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 7337865.0 +err_fin 5350986.0 +sparsity check 0.29999999489103046 +time 137.79 +54 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 6279685.0 +err_fin 4569357.5 +sparsity check 0.29999999489103046 +time 138.18 +54 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 748287.25 +err_fin 697182.0625 +sparsity check 0.29999999489103046 +time 135.71 +55 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 525178.1875 +err_fin 394429.0625 +sparsity check 0.29998879134655 +time 74.93 +55 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 146428.703125 +err_fin 121194.65625 +sparsity check 0.29999983310699463 +time 1.34 +55 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 271352.25 +err_fin 244146.5625 +sparsity check 0.29999983310699463 +time 1.31 +55 self_attn.o_proj +Pruning ... +0.2537457197904587 0.05374570190906525 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.0240283496937537 0.9709505944546686 1.0 +err_prefin 138155.59375 +err_fin 88221.515625 +sparsity check 0.2537457197904587 +time 67.87 +55 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 7445884.0 +err_fin 5445626.5 +sparsity check 0.29999999489103046 +time 137.86 +55 mlp.up_proj +Pruning ... +0.29999276995658875 0.1999746859073639 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060101194662678 0.9709505944546686 1.0 +err_prefin 6423024.0 +err_fin 4690568.5 +sparsity check 0.29999276995658875 +time 138.13 +55 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 762954.875 +err_fin 712664.8125 +sparsity check 0.29999999489103046 +time 135.84 +56 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 383869.125 +err_fin 287241.75 +sparsity check 0.29998879134655 +time 74.92 +56 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 117954.28125 +err_fin 95604.90625 +sparsity check 0.29999983310699463 +time 1.36 +56 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 192813.5625 +err_fin 171541.8125 +sparsity check 0.29999983310699463 +time 1.32 +56 self_attn.o_proj +Pruning ... +0.2789105176925659 0.07891049981117249 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1202567170885263 0.9709505944546686 1.0 +err_prefin 71235.9375 +err_fin 40052.0546875 +sparsity check 0.2789105176925659 +time 67.89 +56 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 7519281.0 +err_fin 5496486.5 +sparsity check 0.29999999489103046 +time 137.82 +56 mlp.up_proj +Pruning ... +0.29999276995658875 0.1999746859073639 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060101194662678 0.9709505944546686 1.0 +err_prefin 6514655.0 +err_fin 4749463.0 +sparsity check 0.29999276995658875 +time 138.22 +56 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 774782.625 +err_fin 723765.0625 +sparsity check 0.29999999489103046 +time 135.74 +57 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 364376.03125 +err_fin 273175.5 +sparsity check 0.29998879134655 +time 74.93 +57 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 105951.109375 +err_fin 88166.7890625 +sparsity check 0.29999983310699463 +time 1.32 +57 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 245037.0625 +err_fin 220778.203125 +sparsity check 0.29999983310699463 +time 1.30 +57 self_attn.o_proj +Pruning ... +0.2683565765619278 0.06835655868053436 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.0816886340616965 0.9709505944546686 1.0 +err_prefin 79624.2109375 +err_fin 50351.03515625 +sparsity check 0.2683565765619278 +time 67.86 +57 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 7758609.0 +err_fin 5682120.0 +sparsity check 0.29999999489103046 +time 137.79 +57 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 6733990.0 +err_fin 4919042.0 +sparsity check 0.29999999489103046 +time 138.09 +57 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 794832.6875 +err_fin 743152.625 +sparsity check 0.29999999489103046 +time 135.81 +58 self_attn.q_proj +Pruning ... +0.29998789727687836 0.09998787939548492 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908853016546297 0.9709505944546686 1.0 +err_prefin 205269.765625 +err_fin 153413.3125 +sparsity check 0.29998789727687836 +time 74.96 +58 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 65268.65625 +err_fin 51892.671875 +sparsity check 0.29999983310699463 +time 1.34 +58 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 176897.375 +err_fin 157272.359375 +sparsity check 0.29999983310699463 +time 1.31 +58 self_attn.o_proj +Pruning ... +0.292335569858551 0.09233555197715759 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1661460413382314 0.9709505944546686 1.0 +err_prefin 52765.92578125 +err_fin 30778.25 +sparsity check 0.292335569858551 +time 67.83 +58 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 7901502.0 +err_fin 5800598.0 +sparsity check 0.29999999489103046 +time 137.85 +58 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 6873436.0 +err_fin 5035017.5 +sparsity check 0.29999999489103046 +time 138.24 +58 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 804072.6875 +err_fin 753086.125 +sparsity check 0.29999999489103046 +time 135.76 +59 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 261189.328125 +err_fin 195252.34375 +sparsity check 0.29998879134655 +time 74.96 +59 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 78628.265625 +err_fin 63917.546875 +sparsity check 0.29999983310699463 +time 1.32 +59 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 198882.34375 +err_fin 175707.046875 +sparsity check 0.29999983310699463 +time 1.31 +59 self_attn.o_proj +Pruning ... +0.29765182733535767 0.09765180945396423 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1834356305698859 0.9709505944546686 1.0 +err_prefin 53340.76953125 +err_fin 29668.001953125 +sparsity check 0.29765182733535767 +time 67.83 +59 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 8060360.0 +err_fin 5930122.5 +sparsity check 0.29999999489103046 +time 137.81 +59 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 7034945.5 +err_fin 5161545.0 +sparsity check 0.29999999489103046 +time 138.11 +59 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 809505.5625 +err_fin 759982.375 +sparsity check 0.29999999489103046 +time 135.83 +60 self_attn.q_proj +Pruning ... +0.29995179176330566 0.09995177388191223 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1907708324184256 0.9709505944546686 1.0 +err_prefin 43336.16015625 +err_fin 32273.69140625 +sparsity check 0.29995179176330566 +time 74.93 +60 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 16123.9609375 +err_fin 13001.099609375 +sparsity check 0.29999983310699463 +time 1.33 +60 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 104462.84375 +err_fin 92991.0859375 +sparsity check 0.29999983310699463 +time 1.31 +60 self_attn.o_proj +Pruning ... +0.2995652109384537 0.09956519305706024 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1895439015964504 0.9709505944546686 1.0 +err_prefin 29143.748046875 +err_fin 17555.71484375 +sparsity check 0.2995652109384537 +time 67.82 +60 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 8142654.0 +err_fin 6016490.5 +sparsity check 0.29999999489103046 +time 137.86 +60 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 7137665.0 +err_fin 5262851.5 +sparsity check 0.29999999489103046 +time 138.31 +60 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 804617.5625 +err_fin 756961.75 +sparsity check 0.29999999489103046 +time 135.77 +61 self_attn.q_proj +Pruning ... +0.2999650239944458 0.09996500611305237 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908127864263665 0.9709505944546686 1.0 +err_prefin 167471.125 +err_fin 126957.0859375 +sparsity check 0.2999650239944458 +time 74.94 +61 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 54862.91796875 +err_fin 44433.5234375 +sparsity check 0.29999983310699463 +time 1.33 +61 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 176642.21875 +err_fin 159432.40625 +sparsity check 0.29999983310699463 +time 1.33 +61 self_attn.o_proj +Pruning ... +0.296064168214798 0.09606415033340454 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.178321739235102 0.9709505944546686 1.0 +err_prefin 39242.359375 +err_fin 23668.04296875 +sparsity check 0.296064168214798 +time 67.81 +61 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 8267901.0 +err_fin 6111480.0 +sparsity check 0.29999999489103046 +time 137.77 +61 mlp.up_proj +Pruning ... +0.29999276995658875 0.1999746859073639 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060101194662678 0.9709505944546686 1.0 +err_prefin 7285141.0 +err_fin 5375617.0 +sparsity check 0.29999276995658875 +time 138.01 +61 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 813747.75 +err_fin 766580.6875 +sparsity check 0.29999999489103046 +time 135.78 +62 self_attn.q_proj +Pruning ... +0.2999762147665024 0.09997619688510895 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908482656089139 0.9709505944546686 1.0 +err_prefin 183847.09375 +err_fin 138195.8125 +sparsity check 0.2999762147665024 +time 74.91 +62 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 60996.53125 +err_fin 49719.8203125 +sparsity check 0.29999983310699463 +time 1.35 +62 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 195448.65625 +err_fin 173201.8125 +sparsity check 0.29999983310699463 +time 1.31 +62 self_attn.o_proj +Pruning ... +0.2990509569644928 0.09905093908309937 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.187908032693221 0.9709505944546686 1.0 +err_prefin 39808.921875 +err_fin 21340.3515625 +sparsity check 0.2990509569644928 +time 67.81 +62 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 8432903.0 +err_fin 6258731.0 +sparsity check 0.29999999489103046 +time 137.74 +62 mlp.up_proj +Pruning ... +0.29999276995658875 0.1999746859073639 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060101194662678 0.9709505944546686 1.0 +err_prefin 7473114.0 +err_fin 5530597.0 +sparsity check 0.29999276995658875 +time 138.19 +62 mlp.down_proj +Pruning ... +0.2999999906335558 0.19999997317790985 0.242857141154153 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245631764015 0.9709505944546686 1.0 +err_prefin 826638.8125 +err_fin 779265.5 +sparsity check 0.2999999906335558 +time 135.78 +63 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 160645.734375 +err_fin 121673.390625 +sparsity check 0.29998879134655 +time 74.94 +63 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 48778.3046875 +err_fin 39141.375 +sparsity check 0.29999983310699463 +time 1.33 +63 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 157449.4375 +err_fin 137935.078125 +sparsity check 0.29999983310699463 +time 1.31 +63 self_attn.o_proj +Pruning ... +0.29345013201236725 0.09345011413097382 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1698103230969439 0.9709505944546686 1.0 +err_prefin 34115.03515625 +err_fin 20066.1640625 +sparsity check 0.29345013201236725 +time 67.90 +63 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 8570918.0 +err_fin 6379026.5 +sparsity check 0.29999999489103046 +time 137.81 +63 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 7615606.5 +err_fin 5653985.5 +sparsity check 0.29999999489103046 +time 138.07 +63 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 836566.25 +err_fin 789226.4375 +sparsity check 0.29999999489103046 +time 135.88 +64 self_attn.q_proj +Pruning ... +0.2999767065048218 0.09997668862342834 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908498245682169 0.9709505944546686 1.0 +err_prefin 299505.1875 +err_fin 226632.09375 +sparsity check 0.2999767065048218 +time 74.97 +64 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 93323.4375 +err_fin 74509.125 +sparsity check 0.29999983310699463 +time 1.34 +64 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 216622.78125 +err_fin 194778.28125 +sparsity check 0.29999983310699463 +time 1.31 +64 self_attn.o_proj +Pruning ... +0.2868971824645996 0.08689716458320618 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1479553973166503 0.9709505944546686 1.0 +err_prefin 50887.25 +err_fin 30267.08984375 +sparsity check 0.2868971824645996 +time 67.90 +64 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 8715448.0 +err_fin 6496698.0 +sparsity check 0.29999999489103046 +time 137.89 +64 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 7766734.0 +err_fin 5779098.5 +sparsity check 0.29999999489103046 +time 138.35 +64 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 851869.125 +err_fin 804173.625 +sparsity check 0.29999999489103046 +time 135.84 +65 self_attn.q_proj +Pruning ... +0.2999642491340637 0.09996423125267029 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908103297375292 0.9709505944546686 1.0 +err_prefin 83586.21875 +err_fin 63058.08984375 +sparsity check 0.2999642491340637 +time 74.97 +65 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 27326.509765625 +err_fin 21336.466796875 +sparsity check 0.29999983310699463 +time 1.35 +65 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 121402.46875 +err_fin 106059.3046875 +sparsity check 0.29999983310699463 +time 1.33 +65 self_attn.o_proj +Pruning ... +0.2976520359516144 0.09765201807022095 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.183436299803295 0.9709505944546686 1.0 +err_prefin 27571.044921875 +err_fin 15242.9150390625 +sparsity check 0.2976520359516144 +time 67.85 +65 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 8837253.0 +err_fin 6610702.5 +sparsity check 0.29999999489103046 +time 137.81 +65 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 7916858.0 +err_fin 5912370.5 +sparsity check 0.29999999489103046 +time 138.16 +65 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 866969.0 +err_fin 818632.5 +sparsity check 0.29999999489103046 +time 135.84 +66 self_attn.q_proj +Pruning ... +0.29998789727687836 0.09998787939548492 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908853016546297 0.9709505944546686 1.0 +err_prefin 149749.015625 +err_fin 112705.265625 +sparsity check 0.29998789727687836 +time 74.94 +66 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 47971.12109375 +err_fin 37845.2578125 +sparsity check 0.29999983310699463 +time 1.35 +66 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 158214.859375 +err_fin 141883.671875 +sparsity check 0.29999983310699463 +time 1.31 +66 self_attn.o_proj +Pruning ... +0.2893896996974945 0.08938968181610107 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1563574977218163 0.9709505944546686 1.0 +err_prefin 41824.7890625 +err_fin 27087.11328125 +sparsity check 0.2893896996974945 +time 67.91 +66 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 9038488.0 +err_fin 6757903.0 +sparsity check 0.29999999489103046 +time 137.76 +66 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 8152020.0 +err_fin 6083348.5 +sparsity check 0.29999999489103046 +time 138.23 +66 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 911303.9375 +err_fin 860313.625 +sparsity check 0.29999999489103046 +time 135.85 +67 self_attn.q_proj +Pruning ... +0.29998789727687836 0.09998787939548492 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908853016546297 0.9709505944546686 1.0 +err_prefin 63394.7734375 +err_fin 47541.875 +sparsity check 0.29998789727687836 +time 75.00 +67 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 22216.2890625 +err_fin 16543.91015625 +sparsity check 0.29999983310699463 +time 1.33 +67 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 68837.0859375 +err_fin 57603.5 +sparsity check 0.29999983310699463 +time 1.31 +67 self_attn.o_proj +Pruning ... +0.29094041883945465 0.09094040095806122 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.161529118747347 0.9709505944546686 1.0 +err_prefin 21132.658203125 +err_fin 9523.146484375 +sparsity check 0.29094041883945465 +time 67.92 +67 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 9077870.0 +err_fin 6794662.0 +sparsity check 0.29999999489103046 +time 137.93 +67 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 8247882.5 +err_fin 6164708.0 +sparsity check 0.29999999489103046 +time 138.18 +67 mlp.down_proj +Pruning ... +0.2999999906335558 0.19999997317790985 0.242857141154153 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245631764015 0.9709505944546686 1.0 +err_prefin 909022.625 +err_fin 857260.1875 +sparsity check 0.2999999906335558 +time 135.98 +68 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 542966.5 +err_fin 412181.25 +sparsity check 0.29998879134655 +time 74.95 +68 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 158593.75 +err_fin 129041.609375 +sparsity check 0.29999983310699463 +time 1.32 +68 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 309721.03125 +err_fin 279954.875 +sparsity check 0.29999983310699463 +time 1.30 +68 self_attn.o_proj +Pruning ... +0.28747063875198364 0.08747062087059021 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1498983780675118 0.9709505944546686 1.0 +err_prefin 41402.9375 +err_fin 26306.3984375 +sparsity check 0.28747063875198364 +time 67.91 +68 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 9395424.0 +err_fin 7047702.5 +sparsity check 0.29999999489103046 +time 137.77 +68 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 8598284.0 +err_fin 6436147.0 +sparsity check 0.29999999489103046 +time 138.35 +68 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 978026.25 +err_fin 920440.875 +sparsity check 0.29999999489103046 +time 136.08 +69 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 900533.125 +err_fin 684633.9375 +sparsity check 0.29998879134655 +time 75.00 +69 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 286843.1875 +err_fin 234974.984375 +sparsity check 0.29999983310699463 +time 1.31 +69 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 417839.0625 +err_fin 373894.9375 +sparsity check 0.29999983310699463 +time 1.31 +69 self_attn.o_proj +Pruning ... +0.28137916326522827 0.08137914538383484 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1289482802695847 0.9709505944546686 1.0 +err_prefin 89954.125 +err_fin 52598.1875 +sparsity check 0.28137916326522827 +time 67.98 +69 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 9581910.0 +err_fin 7180432.0 +sparsity check 0.29999999489103046 +time 137.90 +69 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 8861478.0 +err_fin 6631839.0 +sparsity check 0.29999999489103046 +time 138.13 +69 mlp.down_proj +Pruning ... +0.2999999906335558 0.19999997317790985 0.242857141154153 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245631764015 0.9709505944546686 1.0 +err_prefin 1031425.4375 +err_fin 971489.1875 +sparsity check 0.2999999906335558 +time 135.85 +70 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 532644.75 +err_fin 401152.875 +sparsity check 0.29998879134655 +time 74.96 +70 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 161935.875 +err_fin 128223.234375 +sparsity check 0.29999983310699463 +time 1.34 +70 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 246997.28125 +err_fin 218991.875 +sparsity check 0.29999983310699463 +time 1.32 +70 self_attn.o_proj +Pruning ... +0.2764320373535156 0.07643201947212219 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.111408817228582 0.9709505944546686 1.0 +err_prefin 74907.53125 +err_fin 39078.69140625 +sparsity check 0.2764320373535156 +time 67.86 +70 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 9785912.0 +err_fin 7322683.0 +sparsity check 0.29999999489103046 +time 137.79 +70 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 9113500.0 +err_fin 6808667.5 +sparsity check 0.29999999489103046 +time 138.21 +70 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1106128.5 +err_fin 1040462.5625 +sparsity check 0.29999999489103046 +time 135.72 +71 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 688739.875 +err_fin 523288.21875 +sparsity check 0.29998879134655 +time 74.94 +71 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 215805.96875 +err_fin 174147.234375 +sparsity check 0.29999983310699463 +time 1.33 +71 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 259656.25 +err_fin 231612.75 +sparsity check 0.29999983310699463 +time 1.31 +71 self_attn.o_proj +Pruning ... +0.28335021436214447 0.08335019648075104 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.13580331517402 0.9709505944546686 1.0 +err_prefin 110948.1796875 +err_fin 54004.78125 +sparsity check 0.28335021436214447 +time 67.91 +71 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 10049788.0 +err_fin 7501033.5 +sparsity check 0.29999999489103046 +time 137.82 +71 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 9385523.0 +err_fin 6993777.0 +sparsity check 0.29999999489103046 +time 138.07 +71 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1151471.5 +err_fin 1078902.25 +sparsity check 0.29999999489103046 +time 135.81 +72 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 951426.125 +err_fin 719770.125 +sparsity check 0.29998879134655 +time 74.94 +72 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 272272.0625 +err_fin 225153.5 +sparsity check 0.29999983310699463 +time 1.32 +72 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 299265.09375 +err_fin 267822.75 +sparsity check 0.29999983310699463 +time 1.31 +72 self_attn.o_proj +Pruning ... +0.28141236305236816 0.08141234517097473 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1290643606091357 0.9709505944546686 1.0 +err_prefin 133972.34375 +err_fin 76113.6328125 +sparsity check 0.28141236305236816 +time 67.90 +72 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 10290312.0 +err_fin 7646359.5 +sparsity check 0.29999999489103046 +time 137.79 +72 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 9684390.0 +err_fin 7182011.5 +sparsity check 0.29999999489103046 +time 138.19 +72 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1238548.25 +err_fin 1157237.5 +sparsity check 0.29999999489103046 +time 135.74 +73 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 893224.3125 +err_fin 670156.5 +sparsity check 0.29998879134655 +time 74.94 +73 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 259352.890625 +err_fin 212043.390625 +sparsity check 0.29999983310699463 +time 1.33 +73 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 347109.5625 +err_fin 303783.3125 +sparsity check 0.29999983310699463 +time 1.32 +73 self_attn.o_proj +Pruning ... +0.2856079190969467 0.08560790121555328 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1435652362660824 0.9709505944546686 1.0 +err_prefin 83023.703125 +err_fin 46943.09375 +sparsity check 0.2856079190969467 +time 67.88 +73 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 10542981.0 +err_fin 7793762.0 +sparsity check 0.29999999489103046 +time 137.81 +73 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 9977112.0 +err_fin 7360894.0 +sparsity check 0.29999999489103046 +time 138.11 +73 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1356530.0 +err_fin 1256825.5 +sparsity check 0.29999999489103046 +time 135.71 +74 self_attn.q_proj +Pruning ... +0.29999998211860657 0.09999996423721313 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1909236108740666 0.9709505944546686 1.0 +err_prefin 836136.125 +err_fin 612812.75 +sparsity check 0.29999998211860657 +time 74.95 +74 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 243933.1875 +err_fin 190287.96875 +sparsity check 0.29999983310699463 +time 1.31 +74 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 266785.625 +err_fin 229428.234375 +sparsity check 0.29999983310699463 +time 1.31 +74 self_attn.o_proj +Pruning ... +0.2842986732721329 0.08429865539073944 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.139075677794163 0.9709505944546686 1.0 +err_prefin 199612.921875 +err_fin 92505.375 +sparsity check 0.2842986732721329 +time 67.88 +74 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 10793855.0 +err_fin 7861494.0 +sparsity check 0.29999999489103046 +time 137.74 +74 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 10255294.0 +err_fin 7452481.0 +sparsity check 0.29999999489103046 +time 138.20 +74 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1510742.375 +err_fin 1389251.125 +sparsity check 0.29999999489103046 +time 135.70 +75 self_attn.q_proj +Pruning ... +0.29999998211860657 0.09999996423721313 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1909236108740666 0.9709505944546686 1.0 +err_prefin 837856.75 +err_fin 608661.25 +sparsity check 0.29999998211860657 +time 74.92 +75 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 236869.59375 +err_fin 178914.90625 +sparsity check 0.29999983310699463 +time 1.34 +75 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 324162.875 +err_fin 271104.4375 +sparsity check 0.29999983310699463 +time 1.31 +75 self_attn.o_proj +Pruning ... +0.28303997218608856 0.08303995430469513 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.134729242618206 0.9709505944546686 1.0 +err_prefin 205746.5625 +err_fin 94100.859375 +sparsity check 0.28303997218608856 +time 67.90 +75 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 10955038.0 +err_fin 7880301.0 +sparsity check 0.29999999489103046 +time 137.81 +75 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 10408592.0 +err_fin 7467028.5 +sparsity check 0.29999999489103046 +time 138.02 +75 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1738963.5 +err_fin 1577450.75 +sparsity check 0.29999999489103046 +time 135.80 +76 self_attn.q_proj +Pruning ... +0.29999998211860657 0.09999996423721313 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1909236108740666 0.9709505944546686 1.0 +err_prefin 1289686.0 +err_fin 908438.4375 +sparsity check 0.29999998211860657 +time 74.94 +76 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 349882.875 +err_fin 261249.71875 +sparsity check 0.29999983310699463 +time 1.33 +76 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 487619.5625 +err_fin 406323.0 +sparsity check 0.29999983310699463 +time 1.31 +76 self_attn.o_proj +Pruning ... +0.280566543340683 0.08056652545928955 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1261003695482468 0.9709505944546686 1.0 +err_prefin 628972.0 +err_fin 322890.15625 +sparsity check 0.280566543340683 +time 67.90 +76 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 10994782.0 +err_fin 7706991.5 +sparsity check 0.29999999489103046 +time 137.78 +76 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 10381006.0 +err_fin 7253138.0 +sparsity check 0.29999999489103046 +time 138.17 +76 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 2176063.5 +err_fin 1922020.0 +sparsity check 0.29999999489103046 +time 135.67 +77 self_attn.q_proj +Pruning ... +0.29999998211860657 0.09999996423721313 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1909236108740666 0.9709505944546686 1.0 +err_prefin 885047.625 +err_fin 596472.5 +sparsity check 0.29999998211860657 +time 74.95 +77 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 256333.953125 +err_fin 182484.203125 +sparsity check 0.29999983310699463 +time 1.34 +77 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 336631.5 +err_fin 273807.1875 +sparsity check 0.29999983310699463 +time 1.31 +77 self_attn.o_proj +Pruning ... +0.28083010017871857 0.08083008229732513 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1270254352798543 0.9709505944546686 1.0 +err_prefin 314081.3125 +err_fin 125044.8125 +sparsity check 0.28083010017871857 +time 67.85 +77 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 10395529.0 +err_fin 7067618.0 +sparsity check 0.29999999489103046 +time 137.76 +77 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 9835056.0 +err_fin 6655396.0 +sparsity check 0.29999999489103046 +time 138.06 +77 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 2710644.5 +err_fin 2303077.5 +sparsity check 0.29999999489103046 +time 135.82 +78 self_attn.q_proj +Pruning ... +0.29999998211860657 0.09999996423721313 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1909236108740666 0.9709505944546686 1.0 +err_prefin 872924.625 +err_fin 548241.25 +sparsity check 0.29999998211860657 +time 74.93 +78 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 267820.6875 +err_fin 179371.28125 +sparsity check 0.29999983310699463 +time 1.34 +78 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 289426.4375 +err_fin 240119.75 +sparsity check 0.29999983310699463 +time 1.31 +78 self_attn.o_proj +Pruning ... +0.2920469343662262 0.09204691648483276 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1651936344078773 0.9709505944546686 1.0 +err_prefin 246184.46875 +err_fin 97441.7578125 +sparsity check 0.2920469343662262 +time 67.89 +78 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 8960266.0 +err_fin 5861117.5 +sparsity check 0.29999999489103046 +time 137.81 +78 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 8408182.0 +err_fin 5463602.0 +sparsity check 0.29999999489103046 +time 138.23 +78 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 3032714.0 +err_fin 2369612.25 +sparsity check 0.29999999489103046 +time 135.74 +79 self_attn.q_proj +Pruning ... +0.2999882996082306 0.09998828172683716 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908865770917187 0.9709505944546686 1.0 +err_prefin 587816.625 +err_fin 339601.15625 +sparsity check 0.2999882996082306 +time 74.95 +79 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 199996.71875 +err_fin 134468.171875 +sparsity check 0.29999983310699463 +time 1.33 +79 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 137283.46875 +err_fin 104226.09375 +sparsity check 0.29999983310699463 +time 1.31 +79 self_attn.o_proj +Pruning ... +0.27929268777370453 0.0792926698923111 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1216100825857187 0.9709505944546686 1.0 +err_prefin 71334.6875 +err_fin 20835.970703125 +sparsity check 0.27929268777370453 +time 67.87 +79 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 5652304.5 +err_fin 3413151.25 +sparsity check 0.29999999489103046 +time 137.76 +79 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 5221667.0 +err_fin 3130273.0 +sparsity check 0.29999999489103046 +time 138.04 +79 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 2738567.0 +err_fin 1698428.75 +sparsity check 0.29999999489103046 +time 135.81 +model.embed_tokens.weight tensor(2.5520e-06) +model.layers.0.self_attn.q_proj.weight tensor(4.5747e-06) +model.layers.0.self_attn.k_proj.weight tensor(0.0571) +model.layers.0.self_attn.v_proj.weight tensor(0.0995) +model.layers.0.self_attn.o_proj.weight tensor(4.8578e-06) +model.layers.0.mlp.gate_proj.weight tensor(2.8568e-06) +model.layers.0.mlp.up_proj.weight tensor(2.7631e-06) +model.layers.0.mlp.down_proj.weight tensor(0.0548) +50991.72986912727 +Dataset: wikitext2 +Evaluating ... +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +Perplexity: 5.992858 diff --git a/logs/llama2-70-0.7-no-final b/logs/llama2-70-0.7-no-final new file mode 100644 index 0000000..25c9081 --- /dev/null +++ b/logs/llama2-70-0.7-no-final @@ -0,0 +1,2897 @@ +Running on dev: cuda:0 +loading llama +llama loaded +Starting... on device cuda:0 +Ready. +0 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 31.5526065826416 +time 74.35 +0 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 52.44609832763672 +time 1.29 +0 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 11.234882354736328 +time 1.31 +0 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 2.254166603088379 +time 67.06 +0 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 998.2425537109375 +time 132.89 +0 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1007.9627075195312 +time 133.17 +0 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 19.838790893554688 +time 132.32 +1 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 554.335693359375 +time 74.15 +1 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 661.0631103515625 +time 1.30 +1 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 79.96988677978516 +time 1.29 +1 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 66.66436767578125 +time 67.07 +1 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 7378.185546875 +time 132.96 +1 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 8184.15087890625 +time 133.28 +1 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 406.24114990234375 +time 132.31 +2 self_attn.q_proj +Pruning ... +0.2999999523162842 0.15999996662139893 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482489674002 0.9709505944546686 1.0 +err_prefin 2442.3017578125 +time 74.23 +2 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 2571.81005859375 +time 1.30 +2 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 344.1282043457031 +time 1.31 +2 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 310.08148193359375 +time 67.14 +2 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 31604.423828125 +time 132.88 +2 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 33511.109375 +time 133.27 +2 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1480.284423828125 +time 132.44 +3 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 23244.59765625 +time 74.19 +3 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 14483.0224609375 +time 1.31 +3 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 4647.0556640625 +time 1.31 +3 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 546.3053588867188 +time 67.16 +3 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 76256.078125 +time 132.87 +3 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 77684.734375 +time 133.26 +3 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2164.01904296875 +time 132.32 +4 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 48465.7265625 +time 74.34 +4 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 21516.95703125 +time 1.31 +4 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 7692.712890625 +time 1.30 +4 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 732.003662109375 +time 67.19 +4 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 131821.96875 +time 132.93 +4 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 130936.140625 +time 133.32 +4 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3334.408203125 +time 132.42 +5 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 73133.65625 +time 74.19 +5 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 35456.4140625 +time 1.29 +5 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 11244.857421875 +time 1.30 +5 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 1221.337158203125 +time 67.11 +5 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 169625.625 +time 132.92 +5 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 167639.46875 +time 133.29 +5 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4784.5341796875 +time 132.42 +6 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 84363.0546875 +time 74.20 +6 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 39407.2265625 +time 1.32 +6 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 13365.173828125 +time 1.31 +6 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 2315.88818359375 +time 67.17 +6 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 231124.15625 +time 133.00 +6 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 226603.40625 +time 133.35 +6 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 7224.4072265625 +time 132.42 +7 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 138685.34375 +time 74.33 +7 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 60184.25390625 +time 1.31 +7 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 18989.333984375 +time 1.30 +7 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 2784.62060546875 +time 67.25 +7 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 288652.6875 +time 132.94 +7 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 282198.1875 +time 133.24 +7 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 10346.4921875 +time 132.44 +8 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 119998.609375 +time 74.29 +8 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 52935.63671875 +time 1.31 +8 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 18440.67578125 +time 1.31 +8 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 4614.5458984375 +time 67.12 +8 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 403003.34375 +time 133.03 +8 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 386727.4375 +time 133.35 +8 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 17268.1328125 +time 132.39 +9 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 132674.0625 +time 74.35 +9 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 57647.3515625 +time 1.32 +9 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 27806.244140625 +time 1.31 +9 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 2570.13818359375 +time 67.29 +9 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 520979.5 +time 133.17 +9 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 496662.4375 +time 133.29 +9 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 18120.21484375 +time 132.37 +10 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 75256.96875 +time 74.34 +10 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 30597.345703125 +time 1.31 +10 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 27144.484375 +time 1.31 +10 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 1852.525146484375 +time 67.26 +10 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 641454.375 +time 133.22 +10 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 608783.8125 +time 133.13 +10 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 19486.072265625 +time 132.70 +11 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 126189.28125 +time 74.34 +11 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 60340.3515625 +time 1.30 +11 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 31537.26171875 +time 1.31 +11 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 2960.74169921875 +time 67.27 +11 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 691041.5 +time 133.25 +11 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 658883.5625 +time 133.63 +11 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 21771.5234375 +time 132.76 +12 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 108341.5234375 +time 74.18 +12 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 52667.33984375 +time 1.31 +12 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 23476.75390625 +time 1.29 +12 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 3583.69482421875 +time 67.13 +12 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 688974.125 +time 132.90 +12 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 662338.4375 +time 133.62 +12 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 24323.609375 +time 132.64 +13 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 137623.046875 +time 74.21 +13 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 58294.0859375 +time 1.32 +13 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 32058.76953125 +time 1.30 +13 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 8978.650390625 +time 67.10 +13 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 729425.375 +time 132.88 +13 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 707805.375 +time 133.27 +13 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 31950.888671875 +time 132.36 +14 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 236391.03125 +time 74.15 +14 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 97577.7109375 +time 1.31 +14 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 44853.12109375 +time 1.30 +14 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 11212.888671875 +time 67.11 +14 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 849021.25 +time 132.88 +14 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 823579.5 +time 133.33 +14 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 33598.4609375 +time 132.34 +15 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 167237.703125 +time 74.19 +15 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 67977.171875 +time 1.30 +15 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 55613.015625 +time 1.30 +15 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 11331.427734375 +time 67.10 +15 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 908655.125 +time 132.90 +15 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 886018.125 +time 133.28 +15 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 39335.1484375 +time 132.48 +16 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 163885.6875 +time 74.22 +16 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 72068.109375 +time 1.31 +16 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 51465.4609375 +time 1.31 +16 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 10284.2802734375 +time 67.15 +16 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 963040.8125 +time 132.96 +16 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 940930.75 +time 133.34 +16 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 39663.0390625 +time 132.44 +17 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 215545.953125 +time 74.21 +17 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 92451.4375 +time 1.31 +17 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 44456.1328125 +time 1.32 +17 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 15384.744140625 +time 67.12 +17 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 941519.4375 +time 132.89 +17 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 923326.25 +time 133.25 +17 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 52658.8671875 +time 132.33 +18 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 197980.59375 +time 74.18 +18 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 82839.6640625 +time 1.34 +18 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 69647.796875 +time 1.29 +18 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 15146.6953125 +time 67.08 +18 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1038781.875 +time 132.87 +18 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1012503.5 +time 133.23 +18 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 56106.6640625 +time 132.40 +19 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 239990.21875 +time 74.19 +19 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 96670.7578125 +time 1.31 +19 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 83000.84375 +time 1.32 +19 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 14134.486328125 +time 67.13 +19 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1127269.5 +time 132.90 +19 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1098608.0 +time 133.28 +19 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 55697.8515625 +time 132.38 +20 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 209576.1875 +time 74.18 +20 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 84032.640625 +time 1.30 +20 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 67712.90625 +time 1.30 +20 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 9953.4306640625 +time 67.11 +20 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1217238.5 +time 132.91 +20 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1188032.5 +time 133.23 +20 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 55471.19140625 +time 132.31 +21 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 122628.21875 +time 74.19 +21 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 48572.55078125 +time 1.31 +21 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 57993.25 +time 1.30 +21 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 14676.3857421875 +time 67.11 +21 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1264665.5 +time 132.90 +21 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1240083.125 +time 133.30 +21 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 59337.2421875 +time 132.37 +22 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 118583.1796875 +time 74.17 +22 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 54648.171875 +time 1.31 +22 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 66441.0625 +time 1.30 +22 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 10421.3662109375 +time 67.10 +22 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1336592.25 +time 132.87 +22 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1313410.5 +time 133.35 +22 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 60978.33203125 +time 132.01 +23 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 180634.46875 +time 74.17 +23 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 85172.7890625 +time 1.31 +23 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 70221.453125 +time 1.31 +23 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 13395.240234375 +time 67.12 +23 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1378777.25 +time 132.87 +23 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1358027.25 +time 133.23 +23 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 62420.25390625 +time 132.34 +24 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 160180.546875 +time 74.18 +24 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 75499.0546875 +time 1.30 +24 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 52861.1875 +time 1.31 +24 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 11313.181640625 +time 67.07 +24 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1349332.375 +time 132.84 +24 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1336751.0 +time 133.24 +24 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 63374.6484375 +time 132.39 +25 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 182253.78125 +time 74.23 +25 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 85136.703125 +time 1.30 +25 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 55436.890625 +time 1.29 +25 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 29487.58203125 +time 67.14 +25 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1266213.5 +time 132.94 +25 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1264720.25 +time 133.28 +25 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 92306.71875 +time 132.37 +26 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 278793.5625 +time 74.19 +26 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 123254.59375 +time 1.30 +26 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 83458.3828125 +time 1.31 +26 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 32763.388671875 +time 67.08 +26 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1450715.125 +time 132.99 +26 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1442068.625 +time 133.20 +26 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 89121.875 +time 132.34 +27 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 117377.765625 +time 74.23 +27 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 57734.515625 +time 1.31 +27 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 70675.015625 +time 1.31 +27 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 14198.720703125 +time 67.15 +27 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1539881.75 +time 132.91 +27 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1529547.0 +time 133.27 +27 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 93750.03125 +time 132.36 +28 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 222693.65625 +time 74.22 +28 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 107735.625 +time 1.31 +28 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 108025.09375 +time 1.31 +28 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 22527.2421875 +time 67.13 +28 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1629005.25 +time 132.93 +28 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1618156.125 +time 133.27 +28 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 98853.671875 +time 132.33 +29 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 217407.625 +time 74.21 +29 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 98504.375 +time 1.31 +29 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 117649.53125 +time 1.30 +29 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 27002.08984375 +time 67.17 +29 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1730684.5 +time 132.96 +29 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1715264.5 +time 133.30 +29 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 104639.296875 +time 132.36 +30 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 105451.1015625 +time 74.24 +30 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 41481.4375 +time 1.31 +30 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 107683.2578125 +time 1.31 +30 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 15626.08203125 +time 67.09 +30 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1836840.625 +time 132.94 +30 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1815497.375 +time 133.29 +30 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 107850.6328125 +time 132.29 +31 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 226024.875 +time 74.22 +31 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 100023.734375 +time 1.31 +31 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 116832.1328125 +time 1.31 +31 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 30331.01171875 +time 67.11 +31 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1927874.25 +time 132.94 +31 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1899479.5 +time 133.17 +31 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 109930.75 +time 132.35 +32 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 268912.8125 +time 74.19 +32 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 115952.984375 +time 1.36 +32 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 103949.9375 +time 1.30 +32 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 18538.21875 +time 67.11 +32 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1977070.625 +time 132.95 +32 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1951940.0 +time 133.28 +32 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 115680.390625 +time 132.31 +33 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 389546.34375 +time 74.18 +33 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 156150.109375 +time 1.30 +33 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 115923.796875 +time 1.30 +33 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 38637.20703125 +time 67.10 +33 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1992290.125 +time 133.02 +33 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1972915.0 +time 133.38 +33 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 134200.359375 +time 132.50 +34 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 125157.125 +time 74.22 +34 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 54646.32421875 +time 1.32 +34 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 69443.5625 +time 1.31 +34 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 24770.6875 +time 67.10 +34 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2061498.25 +time 133.07 +34 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2041977.75 +time 133.40 +34 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 146259.5 +time 132.44 +35 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 218872.25 +time 74.16 +35 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 86917.953125 +time 1.30 +35 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 94099.5625 +time 1.31 +35 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 29882.33984375 +time 67.10 +35 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2189425.5 +time 132.93 +35 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2167214.0 +time 133.27 +35 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 155221.28125 +time 132.38 +36 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 209964.21875 +time 74.20 +36 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 86419.625 +time 1.31 +36 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 91525.859375 +time 1.31 +36 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 21488.21484375 +time 67.14 +36 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2239729.5 +time 132.94 +36 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2219749.0 +time 133.27 +36 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 173578.78125 +time 132.33 +37 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 348986.5625 +time 74.19 +37 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 140931.5625 +time 1.31 +37 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 116855.0625 +time 1.31 +37 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 35544.62109375 +time 67.10 +37 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2341148.0 +time 132.88 +37 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2321247.0 +time 133.24 +37 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 188810.5625 +time 132.32 +38 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 390541.6875 +time 74.20 +38 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 154272.296875 +time 1.31 +38 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 123844.84375 +time 1.31 +38 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 60033.3515625 +time 67.11 +38 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2411030.0 +time 132.92 +38 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2395395.0 +time 133.25 +38 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 213351.5625 +time 132.31 +39 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 482137.3125 +time 74.18 +39 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 200098.65625 +time 1.31 +39 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 134308.96875 +time 1.31 +39 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 65089.78125 +time 67.13 +39 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2431834.0 +time 132.90 +39 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2419592.25 +time 133.23 +39 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 261454.25 +time 132.33 +40 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 447430.53125 +time 74.21 +40 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 174069.1875 +time 1.31 +40 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 153654.140625 +time 1.31 +40 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 94084.90625 +time 67.14 +40 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2520647.75 +time 132.95 +40 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2505940.5 +time 133.27 +40 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 300964.78125 +time 132.32 +41 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 360327.9375 +time 74.23 +41 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 137488.0 +time 1.32 +41 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 133586.4375 +time 1.31 +41 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 104387.6484375 +time 67.14 +41 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2610131.0 +time 132.93 +41 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2554464.75 +time 133.25 +41 mlp.down_proj +Pruning ... +0.2999999863760812 0.2499999850988388 0.2285714192049844 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063847936203 0.9709505944546686 1.0 +err_prefin 357945.25 +time 132.38 +42 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 417292.25 +time 74.17 +42 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 149188.53125 +time 1.31 +42 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 164675.15625 +time 1.29 +42 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 94280.3203125 +time 67.08 +42 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2812360.5 +time 132.88 +42 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2692355.75 +time 133.21 +42 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 380780.8125 +time 132.32 +43 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 283651.125 +time 74.18 +43 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 122436.4375 +time 1.31 +43 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 129223.125 +time 1.31 +43 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 76335.546875 +time 67.07 +43 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2922340.0 +time 132.84 +43 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2770022.0 +time 133.23 +43 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 419566.5625 +time 132.38 +44 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 514903.5 +time 74.17 +44 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 152951.6875 +time 1.30 +44 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 195854.1875 +time 1.30 +44 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 180027.28125 +time 67.08 +44 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3191728.25 +time 132.89 +44 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2930800.5 +time 133.20 +44 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 490765.28125 +time 132.35 +45 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 321122.6875 +time 74.19 +45 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 128542.78125 +time 1.31 +45 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 190542.5625 +time 1.31 +45 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 56156.19140625 +time 67.12 +45 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3449118.0 +time 132.91 +45 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3128793.5 +time 133.24 +45 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 493217.34375 +time 132.38 +46 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 265631.4375 +time 74.17 +46 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 101115.84375 +time 1.31 +46 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 164556.421875 +time 1.30 +46 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 78668.4375 +time 67.11 +46 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3702983.0 +time 132.93 +46 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3307951.0 +time 133.22 +46 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 491784.375 +time 132.36 +47 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 367896.5 +time 74.18 +47 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 147918.25 +time 1.31 +47 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 162615.296875 +time 1.31 +47 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 107631.9375 +time 67.11 +47 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3811822.25 +time 132.93 +47 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3361974.0 +time 133.23 +47 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 555750.125 +time 132.44 +48 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 174250.34375 +time 74.16 +48 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 57852.25 +time 1.29 +48 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 154109.546875 +time 1.30 +48 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 71560.65625 +time 67.05 +48 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3988673.0 +time 132.88 +48 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3501328.0 +time 133.21 +48 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 549560.8125 +time 132.34 +49 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 93208.640625 +time 74.20 +49 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 40360.7890625 +time 1.31 +49 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 125977.703125 +time 1.31 +49 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 25567.244140625 +time 67.11 +49 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4134557.5 +time 132.97 +49 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3626741.0 +time 133.31 +49 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 546702.75 +time 132.34 +50 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 155188.4375 +time 74.16 +50 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 52855.34375 +time 1.31 +50 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 160687.25 +time 1.31 +50 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 60433.125 +time 67.07 +50 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4292964.0 +time 132.89 +50 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3749839.0 +time 133.24 +50 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 548676.375 +time 132.35 +51 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 204507.65625 +time 74.21 +51 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 76368.125 +time 1.31 +51 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 166325.875 +time 1.30 +51 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 54188.4140625 +time 67.18 +51 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4411626.0 +time 132.97 +51 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3864618.75 +time 133.27 +51 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 548309.5 +time 132.36 +52 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 345538.46875 +time 74.17 +52 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 97990.2734375 +time 1.30 +52 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 190622.46875 +time 1.31 +52 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 97987.203125 +time 67.10 +52 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4509819.5 +time 132.92 +52 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3939153.0 +time 133.25 +52 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 557036.75 +time 132.33 +53 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 98797.375 +time 74.23 +53 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 31212.240234375 +time 1.31 +53 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 158531.421875 +time 1.31 +53 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 39168.52734375 +time 67.17 +53 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4629908.5 +time 132.96 +53 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4053844.5 +time 133.28 +53 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 563715.5625 +time 132.33 +54 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 116401.71875 +time 74.17 +54 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 36323.578125 +time 1.31 +54 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 163942.734375 +time 1.29 +54 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 49682.015625 +time 67.09 +54 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4739122.0 +time 132.88 +54 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4161110.75 +time 133.22 +54 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 561708.5625 +time 132.35 +55 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 193069.96875 +time 74.20 +55 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 68852.84375 +time 1.31 +55 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 215138.125 +time 1.31 +55 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 93487.796875 +time 67.11 +55 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4823177.0 +time 132.93 +55 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4263465.0 +time 133.26 +55 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 574455.0 +time 132.33 +56 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 145411.03125 +time 74.18 +56 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 54584.6484375 +time 1.29 +56 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 151472.375 +time 1.31 +56 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 45811.046875 +time 67.06 +56 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4864284.0 +time 132.86 +56 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4313716.0 +time 133.22 +56 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 577074.625 +time 132.33 +57 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 126728.4453125 +time 74.17 +57 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 42517.40625 +time 1.30 +57 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 192418.0625 +time 1.29 +57 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 51367.7734375 +time 67.06 +57 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5038693.5 +time 132.87 +57 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4473327.5 +time 133.23 +57 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 587364.125 +time 132.36 +58 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 66652.5625 +time 74.17 +58 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 21538.62109375 +time 1.30 +58 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 137860.5625 +time 1.30 +58 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 27888.09765625 +time 67.08 +58 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5136027.0 +time 132.87 +58 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4568492.0 +time 133.21 +58 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 588803.625 +time 132.34 +59 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 88454.71875 +time 74.19 +59 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 26870.552734375 +time 1.30 +59 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 152944.90625 +time 1.30 +59 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 27917.62109375 +time 67.11 +59 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5246179.0 +time 132.91 +59 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4677478.0 +time 133.16 +59 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 587390.625 +time 132.32 +60 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 12727.052734375 +time 74.19 +60 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 3512.98291015625 +time 1.30 +60 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 82602.203125 +time 1.30 +60 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 14933.5986328125 +time 67.15 +60 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5304410.5 +time 132.89 +60 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4751317.5 +time 133.22 +60 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 578973.5 +time 132.35 +61 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 54048.3671875 +time 74.19 +61 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 16042.8564453125 +time 1.31 +61 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 139960.9375 +time 1.31 +61 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 24920.244140625 +time 67.13 +61 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5398592.0 +time 132.95 +61 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4856495.0 +time 133.32 +61 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 580411.625 +time 132.49 +62 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 57423.78515625 +time 74.17 +62 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 19240.78515625 +time 1.30 +62 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 151660.359375 +time 1.30 +62 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 19601.64453125 +time 67.10 +62 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5514424.0 +time 132.88 +62 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4977589.5 +time 133.25 +62 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 585774.5 +time 132.41 +63 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 53042.68359375 +time 74.20 +63 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 14410.1376953125 +time 1.32 +63 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 118978.71875 +time 1.31 +63 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 19115.703125 +time 67.15 +63 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5625384.5 +time 133.00 +63 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5088692.0 +time 133.37 +63 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 590734.375 +time 132.40 +64 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 102080.15625 +time 74.19 +64 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 33424.9921875 +time 1.31 +64 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 170511.25 +time 1.31 +64 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 33364.4765625 +time 67.20 +64 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5727166.0 +time 132.94 +64 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5197972.0 +time 133.32 +64 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 598185.0625 +time 132.34 +65 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 26113.30859375 +time 74.18 +65 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 6471.1845703125 +time 1.32 +65 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 94210.6796875 +time 1.31 +65 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 13062.37109375 +time 67.11 +65 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5807544.0 +time 132.92 +65 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5295453.5 +time 133.40 +65 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 605141.4375 +time 132.31 +66 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 43937.9921875 +time 74.18 +66 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 12795.828125 +time 1.30 +66 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 124740.25 +time 1.31 +66 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 30385.8828125 +time 67.13 +66 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5949620.5 +time 132.90 +66 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5456178.0 +time 133.30 +66 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 634897.25 +time 132.53 +67 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 21512.400390625 +time 74.19 +67 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 5507.5830078125 +time 1.30 +67 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 51246.125 +time 1.31 +67 self_attn.o_proj +Pruning ... +0.2999999523162842 0.15999996662139893 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482489674002 0.9709505944546686 1.0 +err_prefin 9366.80078125 +time 67.09 +67 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5975366.0 +time 132.92 +67 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5516490.0 +time 133.17 +67 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 629174.375 +time 132.43 +68 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 204316.28125 +time 74.17 +68 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 74044.640625 +time 1.30 +68 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 242222.0 +time 1.30 +68 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 32686.236328125 +time 67.12 +68 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6216001.5 +time 132.90 +68 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5770339.0 +time 133.28 +68 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 681102.875 +time 132.36 +69 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 363704.0 +time 74.20 +69 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 162265.1875 +time 1.31 +69 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 323970.0625 +time 1.30 +69 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 54411.875 +time 67.11 +69 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6338971.5 +time 132.92 +69 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5945954.0 +time 133.28 +69 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 715812.875 +time 132.29 +70 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 198353.359375 +time 74.19 +70 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 75639.3203125 +time 1.31 +70 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 184515.90625 +time 1.30 +70 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 40008.4765625 +time 67.14 +70 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6486191.0 +time 132.90 +70 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6113921.0 +time 133.38 +70 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 766465.3125 +time 132.32 +71 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 282243.5625 +time 74.20 +71 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 123029.546875 +time 1.33 +71 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 198026.59375 +time 1.30 +71 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 52743.42578125 +time 67.14 +71 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6685019.0 +time 132.92 +71 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6310667.0 +time 133.39 +71 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 808048.375 +time 132.28 +72 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 404575.75 +time 74.17 +72 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 169575.328125 +time 1.30 +72 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 231400.0625 +time 1.31 +72 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 68200.359375 +time 67.10 +72 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6837654.0 +time 132.86 +72 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6498931.5 +time 133.22 +72 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 871159.5625 +time 132.30 +73 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 376025.4375 +time 74.18 +73 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 164598.84375 +time 1.35 +73 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 263015.8125 +time 1.31 +73 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 44399.3984375 +time 67.10 +73 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 7012628.0 +time 132.86 +73 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6692871.0 +time 133.33 +73 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 966676.9375 +time 132.32 +74 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 339780.125 +time 74.20 +74 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 141210.046875 +time 1.31 +74 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 202251.15625 +time 1.31 +74 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 94332.234375 +time 67.12 +74 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 7191236.5 +time 132.92 +74 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6890780.0 +time 133.32 +74 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1077826.5 +time 132.38 +75 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 348923.96875 +time 74.24 +75 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 137854.59375 +time 1.36 +75 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 232526.171875 +time 1.31 +75 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 93175.078125 +time 67.16 +75 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 7260220.0 +time 132.95 +75 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6948571.0 +time 133.31 +75 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1219008.25 +time 132.43 +76 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 532576.3125 +time 74.21 +76 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 204126.5 +time 1.30 +76 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 374299.78125 +time 1.31 +76 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 283803.6875 +time 67.14 +76 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 7205197.5 +time 132.96 +76 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6847868.0 +time 133.29 +76 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1495172.0 +time 132.36 +77 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 353864.9375 +time 74.25 +77 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 145092.859375 +time 1.32 +77 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 250922.46875 +time 1.31 +77 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 156918.4375 +time 67.11 +77 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6709413.0 +time 132.94 +77 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6380281.0 +time 133.21 +77 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1729246.5 +time 132.35 +78 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 316572.75 +time 74.20 +78 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 140766.8125 +time 1.31 +78 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 217920.3125 +time 1.31 +78 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 89117.1328125 +time 67.17 +78 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5597530.5 +time 132.94 +78 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5267583.0 +time 133.30 +78 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1796634.125 +time 132.43 +79 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 199077.828125 +time 74.16 +79 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 109701.8359375 +time 1.31 +79 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 96453.1171875 +time 1.29 +79 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 26878.125 +time 67.10 +79 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3397765.25 +time 132.89 +79 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3107865.0 +time 133.36 +79 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1388730.375 +time 132.38 +model.embed_tokens.weight tensor(2.5520e-06) +model.layers.0.self_attn.q_proj.weight tensor(0.0139) +model.layers.0.self_attn.k_proj.weight tensor(0.0296) +model.layers.0.self_attn.v_proj.weight tensor(0.0791) +model.layers.0.self_attn.o_proj.weight tensor(4.2617e-06) +model.layers.0.mlp.gate_proj.weight tensor(0.0001) +model.layers.0.mlp.up_proj.weight tensor(0.0001) +model.layers.0.mlp.down_proj.weight tensor(0.0185) +49798.816762685776 +Dataset: wikitext2 +Evaluating ... +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +Perplexity: 4.587645 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..48ea9c4 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,57 @@ +aiohappyeyeballs==2.4.0 +aiohttp==3.10.5 +aiosignal==1.3.1 +attrs==24.2.0 +certifi==2024.8.30 +charset-normalizer==3.3.2 +datasets==2.16.1 +dill==0.3.7 +filelock==3.16.1 +frozenlist==1.4.1 +fsspec==2023.10.0 +huggingface-hub==0.25.1 +idna==3.10 +Jinja2==3.1.4 +MarkupSafe==2.1.5 +mpmath==1.3.0 +multidict==6.1.0 +multiprocess==0.70.15 +networkx==3.3 +numpy==2.1.1 +nvidia-cublas-cu12==12.1.3.1 +nvidia-cuda-cupti-cu12==12.1.105 +nvidia-cuda-nvrtc-cu12==12.1.105 +nvidia-cuda-runtime-cu12==12.1.105 +nvidia-cudnn-cu12==8.9.2.26 +nvidia-cufft-cu12==11.0.2.54 +nvidia-curand-cu12==10.3.2.106 +nvidia-cusolver-cu12==11.4.5.107 +nvidia-cusparse-cu12==12.1.0.106 +nvidia-nccl-cu12==2.19.3 +nvidia-nvjitlink-cu12==12.6.68 +nvidia-nvtx-cu12==12.1.105 +packaging==24.1 +pandas==2.2.3 +protobuf==5.28.2 +pyarrow==17.0.0 +pyarrow-hotfix==0.6 +python-dateutil==2.9.0.post0 +pytz==2024.2 +PyYAML==6.0.2 +regex==2024.9.11 +requests==2.32.3 +safetensors==0.4.5 +sentencepiece==0.2.0 +setuptools==75.1.0 +six==1.16.0 +sympy==1.13.3 +tokenizers==0.15.2 +torch==2.2.1 +tqdm==4.66.5 +transformers==4.35.2 +typing_extensions==4.12.2 +tzdata==2024.2 +urllib3==2.2.3 +wheel==0.44.0 +xxhash==3.5.0 +yarl==1.12.1 diff --git a/sp0.5mask.sh b/sp0.5mask.sh new file mode 100755 index 0000000..f7614f4 --- /dev/null +++ b/sp0.5mask.sh @@ -0,0 +1,11 @@ +#!/bin/bash +echo "Launched at $(date)" +echo "Job ID: ${SLURM_JOBID}" +echo "Node list: ${SLURM_NODELIST}" +echo "Submit dir.: ${SLURM_SUBMIT_DIR}" +echo "Numb. of cores: ${SLURM_CPUS_PER_TASK}" +echo $SHELL + +echo "Lets get this party started!" + +python llama.py meta-llama/Llama-2-70b-hf c4 --sparsity 0.5 --fix-mask | tee logs/llama2-70-0.5-fix-mask; diff --git a/sp0.5nofinal.sh b/sp0.5nofinal.sh new file mode 100755 index 0000000..ca1d1dd --- /dev/null +++ b/sp0.5nofinal.sh @@ -0,0 +1,11 @@ +#!/bin/bash +echo "Launched at $(date)" +echo "Job ID: ${SLURM_JOBID}" +echo "Node list: ${SLURM_NODELIST}" +echo "Submit dir.: ${SLURM_SUBMIT_DIR}" +echo "Numb. of cores: ${SLURM_CPUS_PER_TASK}" +echo $SHELL + +echo "Lets get this party started!" + +python llama.py meta-llama/Llama-2-70b-hf c4 --sparsity 0.5 --no-final | tee logs/llama2-70-0.5-no-final; diff --git a/sp0.6.sh b/sp0.6.sh new file mode 100755 index 0000000..fddd001 --- /dev/null +++ b/sp0.6.sh @@ -0,0 +1,11 @@ +#!/bin/bash +echo "Launched at $(date)" +echo "Job ID: ${SLURM_JOBID}" +echo "Node list: ${SLURM_NODELIST}" +echo "Submit dir.: ${SLURM_SUBMIT_DIR}" +echo "Numb. of cores: ${SLURM_CPUS_PER_TASK}" +echo $SHELL + +echo "Lets get this party started!" + +python llama.py meta-llama/Llama-2-70b-hf c4 --sparsity 0.6 | tee logs/llama2-70-0.6; diff --git a/sp0.6mask.sh b/sp0.6mask.sh new file mode 100755 index 0000000..4c88e97 --- /dev/null +++ b/sp0.6mask.sh @@ -0,0 +1,11 @@ +#!/bin/bash +echo "Launched at $(date)" +echo "Job ID: ${SLURM_JOBID}" +echo "Node list: ${SLURM_NODELIST}" +echo "Submit dir.: ${SLURM_SUBMIT_DIR}" +echo "Numb. of cores: ${SLURM_CPUS_PER_TASK}" +echo $SHELL + +echo "Lets get this party started!" + +python llama.py meta-llama/Llama-2-70b-hf c4 --sparsity 0.6 --fix-mask | tee logs/llama2-70-0.6-fix-mask; diff --git a/sp0.6nofinal.sh b/sp0.6nofinal.sh new file mode 100755 index 0000000..aa770a5 --- /dev/null +++ b/sp0.6nofinal.sh @@ -0,0 +1,11 @@ +#!/bin/bash +echo "Launched at $(date)" +echo "Job ID: ${SLURM_JOBID}" +echo "Node list: ${SLURM_NODELIST}" +echo "Submit dir.: ${SLURM_SUBMIT_DIR}" +echo "Numb. of cores: ${SLURM_CPUS_PER_TASK}" +echo $SHELL + +echo "Lets get this party started!" + +python llama.py meta-llama/Llama-2-70b-hf c4 --sparsity 0.6 --no-final | tee logs/llama2-70-0.6-no-final; diff --git a/sp0.7mask.sh b/sp0.7mask.sh new file mode 100755 index 0000000..80163ea --- /dev/null +++ b/sp0.7mask.sh @@ -0,0 +1,11 @@ +#!/bin/bash +echo "Launched at $(date)" +echo "Job ID: ${SLURM_JOBID}" +echo "Node list: ${SLURM_NODELIST}" +echo "Submit dir.: ${SLURM_SUBMIT_DIR}" +echo "Numb. of cores: ${SLURM_CPUS_PER_TASK}" +echo $SHELL + +echo "Lets get this party started!" + +python llama.py meta-llama/Llama-2-70b-hf c4 --sparsity 0.7 --fix-mask | tee logs/llama2-70-0.7-fix-mask; diff --git a/sp0.7nofinal.sh b/sp0.7nofinal.sh new file mode 100755 index 0000000..ff1e383 --- /dev/null +++ b/sp0.7nofinal.sh @@ -0,0 +1,11 @@ +#!/bin/bash +echo "Launched at $(date)" +echo "Job ID: ${SLURM_JOBID}" +echo "Node list: ${SLURM_NODELIST}" +echo "Submit dir.: ${SLURM_SUBMIT_DIR}" +echo "Numb. of cores: ${SLURM_CPUS_PER_TASK}" +echo $SHELL + +echo "Lets get this party started!" + +python llama.py meta-llama/Llama-2-70b-hf c4 --sparsity 0.7 --no-final | tee logs/llama2-70-0.7-no-final; diff --git a/srun.txt b/srun.txt new file mode 100644 index 0000000..45e342a --- /dev/null +++ b/srun.txt @@ -0,0 +1 @@ +srun -J "cp0.5mask" -c 5 -p gpu -G 1 --account=p487-24-1 --time=2800 --mem=150GB sp0.5mask.sh