From 45f6cbb46f0c9ec7d6c28a1c919595cf0fc16f58 Mon Sep 17 00:00:00 2001 From: vlejd Date: Sun, 13 Oct 2024 22:09:27 +0200 Subject: [PATCH 1/5] Llama linter fix + change model loading --- llama.py | 46 +++++++++++++++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 15 deletions(-) diff --git a/llama.py b/llama.py index 7993a5b..8e4eb42 100644 --- a/llama.py +++ b/llama.py @@ -8,6 +8,7 @@ try: import wandb + has_wandb = True except: has_wandb = False @@ -15,20 +16,27 @@ def get_llama(model): import torch + def skip(*args, **kwargs): pass + torch.nn.init.kaiming_uniform_ = skip torch.nn.init.uniform_ = skip torch.nn.init.normal_ = skip from transformers import LlamaForCausalLM - model = LlamaForCausalLM.from_pretrained(model, torch_dtype='auto')#, cache_dir='/scratch/p490-24-t/llamas') + + model = LlamaForCausalLM.from_pretrained( + model, + torch_dtype="auto", + cache_dir="/scratch/p487-24-1/llamas", + ) model.seqlen = model.config.max_position_embeddings return model @torch.no_grad() def llama_sequential(model, dataloader, dev): - print("Starting...") + print(f"Starting... on device {dev}") use_cache = model.config.use_cache model.config.use_cache = False @@ -71,7 +79,6 @@ def forward(self, inp, **kwargs): outs = torch.zeros_like(inps) attention_mask = cache["attention_mask"] - if args.fix_mask: masks = {} for n, p in model.named_parameters(): @@ -82,11 +89,11 @@ def forward(self, inp, **kwargs): dim = shape_key[0] nnz = 0.1 if shape_key[0] == shape_key[1] else 0.2 print(n, p.shape, shape_key, nnz) - A = torch.eye(dim, device="cuda") + A = torch.eye(dim, device="cuda") Arand = torch.rand_like(A) Arand += A * 100 - thres = Arand.abs().flatten().sort()[0][int(A.numel() * (1-nnz))] - masks[shape_key] = (Arand.abs() > thres) + thres = Arand.abs().flatten().sort()[0][int(A.numel() * (1 - nnz))] + masks[shape_key] = Arand.abs() > thres print("Ready.") @@ -114,12 +121,16 @@ def forward(self, inp, **kwargs): not (args.minlayer <= i < args.maxlayer and args.prune_only in name) ) == (not args.invert): continue - + fixmask = None if args.fix_mask: - shape_key = min(subset[name].weight.shape), max(subset[name].weight.shape) + shape_key = min(subset[name].weight.shape), max( + subset[name].weight.shape + ) fixmask = masks[shape_key] - gpts[name] = DoubleSparse(subset[name], nofinal=args.no_final, fixmask=fixmask) + gpts[name] = DoubleSparse( + subset[name], nofinal=args.no_final, fixmask=fixmask + ) def add_batch(name): def tmp(_, inp, out): @@ -162,7 +173,7 @@ def tmp(_, inp, out): @torch.no_grad() -def llama_eval(model, testenc, dev, dataset: str, log_wandb: bool = False): +def llama_eval(model, testenc, dev, dataset: str, log_wandb: bool = False): print("Evaluating ...") testenc = testenc.input_ids @@ -320,9 +331,7 @@ def forward(self, inp, **kwargs): parser.add_argument( "--no-final", action="store_true", help="Do not run the finalizer." ) - parser.add_argument( - "--fix-mask", action="store_true", help="Keep one mask fixed." - ) + parser.add_argument("--fix-mask", action="store_true", help="Keep one mask fixed.") args = parser.parse_args() # init W&B logging @@ -330,11 +339,18 @@ def forward(self, inp, **kwargs): assert has_wandb, "wandb not installed try `pip install wandb`" wandb.init(config=args) + print(f"Running on dev: {DEV}") + print("loading llama") model = get_llama(args.model) + print("llama loaded") model.eval() dataloader, testloader = get_loaders( - args.dataset, nsamples=args.nsamples, seed=args.seed, model=args.model, seqlen=model.seqlen + args.dataset, + nsamples=args.nsamples, + seed=args.seed, + model=args.model, + seqlen=model.seqlen, ) if (args.sparsity or args.prunen) and not args.gmp: @@ -342,7 +358,7 @@ def forward(self, inp, **kwargs): llama_sequential(model, dataloader, DEV) for n, p in model.named_parameters(): print(n, torch.mean((p == 0).float())) - if 'down_proj' in n: + if "down_proj" in n: break print(time.time() - tick) From a0ddb44a00e0d5068079ce133f9542cd3322e2cb Mon Sep 17 00:00:00 2001 From: vlejd Date: Sun, 13 Oct 2024 22:09:57 +0200 Subject: [PATCH 2/5] Add script for model downloading --- download_models.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 download_models.py diff --git a/download_models.py b/download_models.py new file mode 100644 index 0000000..9e4be06 --- /dev/null +++ b/download_models.py @@ -0,0 +1,13 @@ +from transformers import LlamaForCausalLM + +model_small = "meta-llama/Llama-2-7b-hf" +model_medium = "meta-llama/Llama-2-13b-hf" +model_large = "meta-llama/Llama-2-70b-hf" + + +model = LlamaForCausalLM.from_pretrained( + model_medium, + torch_dtype="auto", + cache_dir="/scratch/p490-24-t/all_llamas", + token="", +) From a484088b074d5b43e863c283892f1f0d91c95b60 Mon Sep 17 00:00:00 2001 From: vlejd Date: Sun, 13 Oct 2024 22:15:18 +0200 Subject: [PATCH 3/5] Add requirements --- requirements.txt | 57 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..48ea9c4 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,57 @@ +aiohappyeyeballs==2.4.0 +aiohttp==3.10.5 +aiosignal==1.3.1 +attrs==24.2.0 +certifi==2024.8.30 +charset-normalizer==3.3.2 +datasets==2.16.1 +dill==0.3.7 +filelock==3.16.1 +frozenlist==1.4.1 +fsspec==2023.10.0 +huggingface-hub==0.25.1 +idna==3.10 +Jinja2==3.1.4 +MarkupSafe==2.1.5 +mpmath==1.3.0 +multidict==6.1.0 +multiprocess==0.70.15 +networkx==3.3 +numpy==2.1.1 +nvidia-cublas-cu12==12.1.3.1 +nvidia-cuda-cupti-cu12==12.1.105 +nvidia-cuda-nvrtc-cu12==12.1.105 +nvidia-cuda-runtime-cu12==12.1.105 +nvidia-cudnn-cu12==8.9.2.26 +nvidia-cufft-cu12==11.0.2.54 +nvidia-curand-cu12==10.3.2.106 +nvidia-cusolver-cu12==11.4.5.107 +nvidia-cusparse-cu12==12.1.0.106 +nvidia-nccl-cu12==2.19.3 +nvidia-nvjitlink-cu12==12.6.68 +nvidia-nvtx-cu12==12.1.105 +packaging==24.1 +pandas==2.2.3 +protobuf==5.28.2 +pyarrow==17.0.0 +pyarrow-hotfix==0.6 +python-dateutil==2.9.0.post0 +pytz==2024.2 +PyYAML==6.0.2 +regex==2024.9.11 +requests==2.32.3 +safetensors==0.4.5 +sentencepiece==0.2.0 +setuptools==75.1.0 +six==1.16.0 +sympy==1.13.3 +tokenizers==0.15.2 +torch==2.2.1 +tqdm==4.66.5 +transformers==4.35.2 +typing_extensions==4.12.2 +tzdata==2024.2 +urllib3==2.2.3 +wheel==0.44.0 +xxhash==3.5.0 +yarl==1.12.1 From d4d699040a5137feab5004acfcbcf7d240f7862a Mon Sep 17 00:00:00 2001 From: vlejd Date: Sun, 13 Oct 2024 22:16:02 +0200 Subject: [PATCH 4/5] Add slurm run scripts --- sp0.5mask.sh | 11 +++++++++++ sp0.5nofinal.sh | 11 +++++++++++ sp0.6.sh | 11 +++++++++++ sp0.6mask.sh | 11 +++++++++++ sp0.6nofinal.sh | 11 +++++++++++ sp0.7mask.sh | 11 +++++++++++ sp0.7nofinal.sh | 11 +++++++++++ srun.txt | 1 + 8 files changed, 78 insertions(+) create mode 100755 sp0.5mask.sh create mode 100755 sp0.5nofinal.sh create mode 100755 sp0.6.sh create mode 100755 sp0.6mask.sh create mode 100755 sp0.6nofinal.sh create mode 100755 sp0.7mask.sh create mode 100755 sp0.7nofinal.sh create mode 100644 srun.txt diff --git a/sp0.5mask.sh b/sp0.5mask.sh new file mode 100755 index 0000000..f7614f4 --- /dev/null +++ b/sp0.5mask.sh @@ -0,0 +1,11 @@ +#!/bin/bash +echo "Launched at $(date)" +echo "Job ID: ${SLURM_JOBID}" +echo "Node list: ${SLURM_NODELIST}" +echo "Submit dir.: ${SLURM_SUBMIT_DIR}" +echo "Numb. of cores: ${SLURM_CPUS_PER_TASK}" +echo $SHELL + +echo "Lets get this party started!" + +python llama.py meta-llama/Llama-2-70b-hf c4 --sparsity 0.5 --fix-mask | tee logs/llama2-70-0.5-fix-mask; diff --git a/sp0.5nofinal.sh b/sp0.5nofinal.sh new file mode 100755 index 0000000..ca1d1dd --- /dev/null +++ b/sp0.5nofinal.sh @@ -0,0 +1,11 @@ +#!/bin/bash +echo "Launched at $(date)" +echo "Job ID: ${SLURM_JOBID}" +echo "Node list: ${SLURM_NODELIST}" +echo "Submit dir.: ${SLURM_SUBMIT_DIR}" +echo "Numb. of cores: ${SLURM_CPUS_PER_TASK}" +echo $SHELL + +echo "Lets get this party started!" + +python llama.py meta-llama/Llama-2-70b-hf c4 --sparsity 0.5 --no-final | tee logs/llama2-70-0.5-no-final; diff --git a/sp0.6.sh b/sp0.6.sh new file mode 100755 index 0000000..fddd001 --- /dev/null +++ b/sp0.6.sh @@ -0,0 +1,11 @@ +#!/bin/bash +echo "Launched at $(date)" +echo "Job ID: ${SLURM_JOBID}" +echo "Node list: ${SLURM_NODELIST}" +echo "Submit dir.: ${SLURM_SUBMIT_DIR}" +echo "Numb. of cores: ${SLURM_CPUS_PER_TASK}" +echo $SHELL + +echo "Lets get this party started!" + +python llama.py meta-llama/Llama-2-70b-hf c4 --sparsity 0.6 | tee logs/llama2-70-0.6; diff --git a/sp0.6mask.sh b/sp0.6mask.sh new file mode 100755 index 0000000..4c88e97 --- /dev/null +++ b/sp0.6mask.sh @@ -0,0 +1,11 @@ +#!/bin/bash +echo "Launched at $(date)" +echo "Job ID: ${SLURM_JOBID}" +echo "Node list: ${SLURM_NODELIST}" +echo "Submit dir.: ${SLURM_SUBMIT_DIR}" +echo "Numb. of cores: ${SLURM_CPUS_PER_TASK}" +echo $SHELL + +echo "Lets get this party started!" + +python llama.py meta-llama/Llama-2-70b-hf c4 --sparsity 0.6 --fix-mask | tee logs/llama2-70-0.6-fix-mask; diff --git a/sp0.6nofinal.sh b/sp0.6nofinal.sh new file mode 100755 index 0000000..aa770a5 --- /dev/null +++ b/sp0.6nofinal.sh @@ -0,0 +1,11 @@ +#!/bin/bash +echo "Launched at $(date)" +echo "Job ID: ${SLURM_JOBID}" +echo "Node list: ${SLURM_NODELIST}" +echo "Submit dir.: ${SLURM_SUBMIT_DIR}" +echo "Numb. of cores: ${SLURM_CPUS_PER_TASK}" +echo $SHELL + +echo "Lets get this party started!" + +python llama.py meta-llama/Llama-2-70b-hf c4 --sparsity 0.6 --no-final | tee logs/llama2-70-0.6-no-final; diff --git a/sp0.7mask.sh b/sp0.7mask.sh new file mode 100755 index 0000000..80163ea --- /dev/null +++ b/sp0.7mask.sh @@ -0,0 +1,11 @@ +#!/bin/bash +echo "Launched at $(date)" +echo "Job ID: ${SLURM_JOBID}" +echo "Node list: ${SLURM_NODELIST}" +echo "Submit dir.: ${SLURM_SUBMIT_DIR}" +echo "Numb. of cores: ${SLURM_CPUS_PER_TASK}" +echo $SHELL + +echo "Lets get this party started!" + +python llama.py meta-llama/Llama-2-70b-hf c4 --sparsity 0.7 --fix-mask | tee logs/llama2-70-0.7-fix-mask; diff --git a/sp0.7nofinal.sh b/sp0.7nofinal.sh new file mode 100755 index 0000000..ff1e383 --- /dev/null +++ b/sp0.7nofinal.sh @@ -0,0 +1,11 @@ +#!/bin/bash +echo "Launched at $(date)" +echo "Job ID: ${SLURM_JOBID}" +echo "Node list: ${SLURM_NODELIST}" +echo "Submit dir.: ${SLURM_SUBMIT_DIR}" +echo "Numb. of cores: ${SLURM_CPUS_PER_TASK}" +echo $SHELL + +echo "Lets get this party started!" + +python llama.py meta-llama/Llama-2-70b-hf c4 --sparsity 0.7 --no-final | tee logs/llama2-70-0.7-no-final; diff --git a/srun.txt b/srun.txt new file mode 100644 index 0000000..45e342a --- /dev/null +++ b/srun.txt @@ -0,0 +1 @@ +srun -J "cp0.5mask" -c 5 -p gpu -G 1 --account=p487-24-1 --time=2800 --mem=150GB sp0.5mask.sh From 17fc6184501f3dd2b980ad9bc5dd79c271f56ff0 Mon Sep 17 00:00:00 2001 From: vlejd Date: Sun, 13 Oct 2024 22:20:46 +0200 Subject: [PATCH 5/5] Add expriment logs --- logs/llama2-13-0.5 | 2 + logs/llama2-70-0.5 | 4017 ++++++++++++++++++++++++++++++++++ logs/llama2-70-0.5-fix-mask | 4020 +++++++++++++++++++++++++++++++++++ logs/llama2-70-0.5-no-final | 2897 +++++++++++++++++++++++++ logs/llama2-70-0.6 | 4017 ++++++++++++++++++++++++++++++++++ logs/llama2-70-0.6-fix-mask | 4020 +++++++++++++++++++++++++++++++++++ logs/llama2-70-0.6-no-final | 2897 +++++++++++++++++++++++++ logs/llama2-70-0.7 | 4017 ++++++++++++++++++++++++++++++++++ logs/llama2-70-0.7-fix-mask | 4020 +++++++++++++++++++++++++++++++++++ logs/llama2-70-0.7-no-final | 2897 +++++++++++++++++++++++++ 10 files changed, 32804 insertions(+) create mode 100644 logs/llama2-13-0.5 create mode 100644 logs/llama2-70-0.5 create mode 100644 logs/llama2-70-0.5-fix-mask create mode 100644 logs/llama2-70-0.5-no-final create mode 100644 logs/llama2-70-0.6 create mode 100644 logs/llama2-70-0.6-fix-mask create mode 100644 logs/llama2-70-0.6-no-final create mode 100644 logs/llama2-70-0.7 create mode 100644 logs/llama2-70-0.7-fix-mask create mode 100644 logs/llama2-70-0.7-no-final diff --git a/logs/llama2-13-0.5 b/logs/llama2-13-0.5 new file mode 100644 index 0000000..7b55b0e --- /dev/null +++ b/logs/llama2-13-0.5 @@ -0,0 +1,2 @@ +Running on dev: cuda:0 +loading llama diff --git a/logs/llama2-70-0.5 b/logs/llama2-70-0.5 new file mode 100644 index 0000000..3ca622c --- /dev/null +++ b/logs/llama2-70-0.5 @@ -0,0 +1,4017 @@ +Running on dev: cuda:0 +loading llama +llama loaded +Starting... on device cuda:0 +Ready. +0 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 10.717611312866211 +err_fin 4.021018981933594 +sparsity check 0.4999999701976776 +time 76.29 +0 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 21.521377563476562 +err_fin 7.139569282531738 +sparsity check 0.4999997615814209 +time 1.34 +0 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 2.3604230880737305 +err_fin 1.2943761348724365 +sparsity check 0.4999997615814209 +time 1.34 +0 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 0.3077867031097412 +err_fin 0.014042209833860397 +sparsity check 0.4999999701976776 +time 68.94 +0 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 232.090087890625 +err_fin 70.949462890625 +sparsity check 0.49999999148505075 +time 138.88 +0 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 232.13897705078125 +err_fin 70.3640365600586 +sparsity check 0.49999999148505075 +time 139.19 +0 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 4.307583332061768 +err_fin 2.0438151359558105 +sparsity check 0.49999999148505075 +time 136.81 +1 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 202.37362670898438 +err_fin 59.84782791137695 +sparsity check 0.4999999701976776 +time 76.03 +1 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 191.2064666748047 +err_fin 68.62715148925781 +sparsity check 0.4999997615814209 +time 1.34 +1 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 17.184864044189453 +err_fin 9.660581588745117 +sparsity check 0.4999997615814209 +time 1.34 +1 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 10.127310752868652 +err_fin 1.458266258239746 +sparsity check 0.4999999701976776 +time 68.90 +1 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1592.7952880859375 +err_fin 427.54974365234375 +sparsity check 0.49999999148505075 +time 138.90 +1 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1730.3133544921875 +err_fin 450.73004150390625 +sparsity check 0.49999999148505075 +time 139.15 +1 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 102.43939208984375 +err_fin 80.88983154296875 +sparsity check 0.49999999148505075 +time 136.81 +2 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 689.7213134765625 +err_fin 288.656005859375 +sparsity check 0.4999999701976776 +time 75.94 +2 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 824.6632080078125 +err_fin 409.6326904296875 +sparsity check 0.4999997615814209 +time 1.35 +2 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 83.88253021240234 +err_fin 58.09417724609375 +sparsity check 0.4999997615814209 +time 1.36 +2 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 53.122962951660156 +err_fin 13.704061508178711 +sparsity check 0.4999999701976776 +time 68.95 +2 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 7269.6982421875 +err_fin 2657.683837890625 +sparsity check 0.49999999148505075 +time 138.88 +2 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 7611.2265625 +err_fin 2706.63330078125 +sparsity check 0.49999999148505075 +time 139.19 +2 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 372.63592529296875 +err_fin 314.4751892089844 +sparsity check 0.49999999148505075 +time 136.74 +3 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 5286.748046875 +err_fin 2658.657958984375 +sparsity check 0.4999999701976776 +time 75.96 +3 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 3639.0576171875 +err_fin 2397.865478515625 +sparsity check 0.4999997615814209 +time 1.36 +3 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 1165.04296875 +err_fin 878.8387451171875 +sparsity check 0.4999997615814209 +time 1.35 +3 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 108.81578063964844 +err_fin 44.38947296142578 +sparsity check 0.4999999701976776 +time 68.97 +3 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 18334.98828125 +err_fin 8419.98828125 +sparsity check 0.49999999148505075 +time 138.87 +3 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 18495.703125 +err_fin 8321.044921875 +sparsity check 0.49999999148505075 +time 139.22 +3 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 570.5305786132812 +err_fin 501.5911865234375 +sparsity check 0.49999999148505075 +time 136.73 +4 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 11264.912109375 +err_fin 6210.33642578125 +sparsity check 0.4999999701976776 +time 75.97 +4 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 5589.6083984375 +err_fin 3927.994384765625 +sparsity check 0.4999997615814209 +time 1.35 +4 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 2032.5537109375 +err_fin 1604.776611328125 +sparsity check 0.4999997615814209 +time 1.34 +4 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 128.72828674316406 +err_fin 56.752899169921875 +sparsity check 0.4999999701976776 +time 68.87 +4 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 32660.15234375 +err_fin 17234.7265625 +sparsity check 0.49999999148505075 +time 138.82 +4 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 32144.40625 +err_fin 16705.20703125 +sparsity check 0.49999999148505075 +time 139.15 +4 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 881.4537963867188 +err_fin 788.0779418945312 +sparsity check 0.49999999148505075 +time 136.71 +5 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 16228.5498046875 +err_fin 9696.9482421875 +sparsity check 0.4999999701976776 +time 75.96 +5 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 9036.0322265625 +err_fin 6727.912109375 +sparsity check 0.4999997615814209 +time 1.36 +5 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 2971.5908203125 +err_fin 2443.595458984375 +sparsity check 0.4999997615814209 +time 1.36 +5 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 200.94137573242188 +err_fin 85.52928161621094 +sparsity check 0.4999999701976776 +time 68.92 +5 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 42693.5234375 +err_fin 24387.9921875 +sparsity check 0.49999999148505075 +time 138.83 +5 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 41738.5078125 +err_fin 23544.52734375 +sparsity check 0.49999999148505075 +time 139.19 +5 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1236.692138671875 +err_fin 1100.3291015625 +sparsity check 0.49999999148505075 +time 136.70 +6 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 18688.27734375 +err_fin 11589.1328125 +sparsity check 0.4999999701976776 +time 75.97 +6 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 10250.5146484375 +err_fin 7875.67724609375 +sparsity check 0.4999997615814209 +time 1.35 +6 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 3508.52392578125 +err_fin 2945.929443359375 +sparsity check 0.4999997615814209 +time 1.34 +6 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 397.99334716796875 +err_fin 195.22512817382812 +sparsity check 0.4999999701976776 +time 68.90 +6 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 57998.7265625 +err_fin 34412.84375 +sparsity check 0.49999999148505075 +time 138.86 +6 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 56338.06640625 +err_fin 33092.6171875 +sparsity check 0.49999999148505075 +time 139.17 +6 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1841.37353515625 +err_fin 1657.0572509765625 +sparsity check 0.49999999148505075 +time 136.75 +7 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 29829.900390625 +err_fin 19338.62109375 +sparsity check 0.4999999701976776 +time 75.95 +7 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 15511.46875 +err_fin 12543.9169921875 +sparsity check 0.4999997615814209 +time 1.34 +7 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 5050.56201171875 +err_fin 4345.7109375 +sparsity check 0.4999997615814209 +time 1.34 +7 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 480.6468200683594 +err_fin 224.02084350585938 +sparsity check 0.4999999701976776 +time 69.05 +7 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 73034.484375 +err_fin 45068.453125 +sparsity check 0.49999999148505075 +time 139.08 +7 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 70760.2421875 +err_fin 43282.4140625 +sparsity check 0.49999999148505075 +time 139.45 +7 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 2596.173828125 +err_fin 2334.00634765625 +sparsity check 0.49999999148505075 +time 136.99 +8 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 26124.1953125 +err_fin 17458.955078125 +sparsity check 0.4999999701976776 +time 75.99 +8 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 13504.5673828125 +err_fin 10742.255859375 +sparsity check 0.4999997615814209 +time 1.54 +8 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 4790.90869140625 +err_fin 4118.7841796875 +sparsity check 0.4999997615814209 +time 1.35 +8 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 817.05517578125 +err_fin 364.8235168457031 +sparsity check 0.4999999701976776 +time 68.93 +8 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 102645.140625 +err_fin 65863.28125 +sparsity check 0.49999999148505075 +time 139.05 +8 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 97679.3203125 +err_fin 62139.15625 +sparsity check 0.49999999148505075 +time 139.36 +8 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 4634.08203125 +err_fin 4652.578125 +sparsity check 0.49999999148505075 +time 136.88 +9 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 29451.58203125 +err_fin 20870.509765625 +sparsity check 0.4999999701976776 +time 75.93 +9 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 15569.3203125 +err_fin 13249.1708984375 +sparsity check 0.4999997615814209 +time 1.37 +9 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 7520.92578125 +err_fin 6713.357421875 +sparsity check 0.4999997615814209 +time 1.33 +9 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 462.6151123046875 +err_fin 199.6043701171875 +sparsity check 0.4999999701976776 +time 68.87 +9 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 134011.9375 +err_fin 89886.0 +sparsity check 0.49999999148505075 +time 139.05 +9 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 126603.546875 +err_fin 84247.96875 +sparsity check 0.49999999148505075 +time 139.44 +9 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 4630.21630859375 +err_fin 4230.681640625 +sparsity check 0.49999999148505075 +time 136.93 +10 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 17889.865234375 +err_fin 12942.79296875 +sparsity check 0.4999999701976776 +time 75.95 +10 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 7444.3203125 +err_fin 6123.212890625 +sparsity check 0.4999997615814209 +time 1.35 +10 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 7306.6875 +err_fin 6499.71875 +sparsity check 0.4999997615814209 +time 1.35 +10 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 357.70159912109375 +err_fin 163.88455200195312 +sparsity check 0.4999999701976776 +time 68.94 +10 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 164816.0625 +err_fin 115543.453125 +sparsity check 0.49999999148505075 +time 139.09 +10 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 155024.53125 +err_fin 107871.078125 +sparsity check 0.49999999148505075 +time 139.41 +10 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 5262.95166015625 +err_fin 4857.232421875 +sparsity check 0.49999999148505075 +time 136.99 +11 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 28585.357421875 +err_fin 21026.21484375 +sparsity check 0.4999999701976776 +time 75.94 +11 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 15701.8076171875 +err_fin 13197.6953125 +sparsity check 0.4999997615814209 +time 1.36 +11 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 8526.1015625 +err_fin 7540.1748046875 +sparsity check 0.4999997615814209 +time 1.33 +11 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 553.4183959960938 +err_fin 248.75852966308594 +sparsity check 0.4999999701976776 +time 68.86 +11 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 178588.0625 +err_fin 127428.390625 +sparsity check 0.49999999148505075 +time 138.99 +11 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 168528.328125 +err_fin 119484.1875 +sparsity check 0.49999999148505075 +time 139.34 +11 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 5805.6396484375 +err_fin 5372.294921875 +sparsity check 0.49999999148505075 +time 136.90 +12 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 24431.125 +err_fin 17950.859375 +sparsity check 0.4999999701976776 +time 75.95 +12 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 13860.294921875 +err_fin 11449.720703125 +sparsity check 0.4999997615814209 +time 1.37 +12 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 6270.4541015625 +err_fin 5479.6904296875 +sparsity check 0.4999997615814209 +time 1.34 +12 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 686.9627075195312 +err_fin 301.6113586425781 +sparsity check 0.4999999701976776 +time 68.94 +12 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 176493.125 +err_fin 126617.734375 +sparsity check 0.49999999148505075 +time 138.89 +12 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 168081.25 +err_fin 119958.796875 +sparsity check 0.49999999148505075 +time 139.24 +12 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 6426.07421875 +err_fin 5913.2431640625 +sparsity check 0.49999999148505075 +time 136.74 +13 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 31198.03125 +err_fin 23000.18359375 +sparsity check 0.4999999701976776 +time 75.98 +13 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 14692.3046875 +err_fin 12144.748046875 +sparsity check 0.4999997615814209 +time 1.36 +13 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 8522.2490234375 +err_fin 7549.24169921875 +sparsity check 0.4999997615814209 +time 1.34 +13 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 1789.514404296875 +err_fin 919.1436767578125 +sparsity check 0.4999999701976776 +time 69.03 +13 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 186214.0 +err_fin 131577.75 +sparsity check 0.49999999148505075 +time 138.90 +13 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 179030.8125 +err_fin 125853.3984375 +sparsity check 0.49999999148505075 +time 139.32 +13 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 8078.4228515625 +err_fin 7425.3779296875 +sparsity check 0.49999999148505075 +time 136.75 +14 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 50920.99609375 +err_fin 37752.6328125 +sparsity check 0.4999999701976776 +time 75.98 +14 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 25162.892578125 +err_fin 21305.4609375 +sparsity check 0.4999997615814209 +time 1.35 +14 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 11922.4052734375 +err_fin 10666.63671875 +sparsity check 0.4999997615814209 +time 1.33 +14 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 2095.82421875 +err_fin 990.3251953125 +sparsity check 0.4999999701976776 +time 68.94 +14 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 216393.25 +err_fin 156643.15625 +sparsity check 0.49999999148505075 +time 138.93 +14 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 208099.84375 +err_fin 149852.75 +sparsity check 0.49999999148505075 +time 139.25 +14 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 8740.8046875 +err_fin 8084.6298828125 +sparsity check 0.49999999148505075 +time 136.73 +15 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 37790.51171875 +err_fin 28526.0625 +sparsity check 0.4999999701976776 +time 75.94 +15 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 17737.49609375 +err_fin 15115.232421875 +sparsity check 0.4999997615814209 +time 1.35 +15 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 14803.548828125 +err_fin 13282.87109375 +sparsity check 0.4999997615814209 +time 1.34 +15 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 2026.078125 +err_fin 996.5897216796875 +sparsity check 0.4999999701976776 +time 68.91 +15 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 232528.6875 +err_fin 169218.375 +sparsity check 0.49999999148505075 +time 138.86 +15 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 224502.9375 +err_fin 162470.671875 +sparsity check 0.49999999148505075 +time 139.25 +15 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 10293.46875 +err_fin 9477.0576171875 +sparsity check 0.49999999148505075 +time 136.77 +16 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 36932.99609375 +err_fin 28156.908203125 +sparsity check 0.4999999701976776 +time 75.97 +16 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 18068.9453125 +err_fin 15490.4482421875 +sparsity check 0.4999997615814209 +time 1.36 +16 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 13673.15234375 +err_fin 12359.716796875 +sparsity check 0.4999997615814209 +time 1.34 +16 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 1736.9384765625 +err_fin 855.127197265625 +sparsity check 0.4999999701976776 +time 68.89 +16 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 245166.546875 +err_fin 180737.34375 +sparsity check 0.49999999148505075 +time 138.89 +16 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 237606.0 +err_fin 174357.09375 +sparsity check 0.49999999148505075 +time 139.21 +16 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 10494.88671875 +err_fin 9731.7978515625 +sparsity check 0.49999999148505075 +time 136.77 +17 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 47215.6171875 +err_fin 35621.890625 +sparsity check 0.4999999701976776 +time 76.00 +17 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 24034.9609375 +err_fin 20469.904296875 +sparsity check 0.4999997615814209 +time 1.36 +17 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 11634.8115234375 +err_fin 10334.44921875 +sparsity check 0.4999997615814209 +time 1.35 +17 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 2806.618896484375 +err_fin 1201.42431640625 +sparsity check 0.4999999701976776 +time 68.98 +17 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 241953.90625 +err_fin 176146.4375 +sparsity check 0.49999999148505075 +time 139.31 +17 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 235002.453125 +err_fin 170181.765625 +sparsity check 0.49999999148505075 +time 139.14 +17 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 13314.861328125 +err_fin 12041.2578125 +sparsity check 0.49999999148505075 +time 136.73 +18 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 44651.109375 +err_fin 33904.859375 +sparsity check 0.4999999701976776 +time 75.95 +18 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 20965.923828125 +err_fin 18088.37890625 +sparsity check 0.4999997615814209 +time 1.36 +18 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 18601.740234375 +err_fin 16901.2890625 +sparsity check 0.4999997615814209 +time 1.35 +18 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 2928.0595703125 +err_fin 1358.638916015625 +sparsity check 0.4999999701976776 +time 69.09 +18 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 268416.65625 +err_fin 196345.8125 +sparsity check 0.49999999148505075 +time 138.90 +18 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 258956.84375 +err_fin 188393.03125 +sparsity check 0.49999999148505075 +time 139.17 +18 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 14657.4140625 +err_fin 13266.8935546875 +sparsity check 0.49999999148505075 +time 136.67 +19 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 53517.2109375 +err_fin 40694.8125 +sparsity check 0.4999999701976776 +time 75.94 +19 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 25084.912109375 +err_fin 22073.0234375 +sparsity check 0.4999997615814209 +time 1.39 +19 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 22389.66015625 +err_fin 20554.13671875 +sparsity check 0.4999997615814209 +time 1.38 +19 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 2164.009765625 +err_fin 1077.3140869140625 +sparsity check 0.4999999701976776 +time 68.83 +19 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 291777.65625 +err_fin 215023.40625 +sparsity check 0.49999999148505075 +time 138.84 +19 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 282239.6875 +err_fin 206979.453125 +sparsity check 0.49999999148505075 +time 139.14 +19 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 14946.92578125 +err_fin 13706.7490234375 +sparsity check 0.49999999148505075 +time 136.84 +20 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 47994.28515625 +err_fin 36983.4296875 +sparsity check 0.4999999701976776 +time 75.99 +20 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 21030.80078125 +err_fin 18505.185546875 +sparsity check 0.4999997615814209 +time 1.33 +20 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 18498.4921875 +err_fin 17065.44140625 +sparsity check 0.4999997615814209 +time 1.34 +20 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 1787.7657470703125 +err_fin 750.0384521484375 +sparsity check 0.4999999701976776 +time 68.86 +20 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 314702.21875 +err_fin 233465.40625 +sparsity check 0.49999999148505075 +time 138.91 +20 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 304739.125 +err_fin 224867.671875 +sparsity check 0.49999999148505075 +time 139.31 +20 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 15340.76171875 +err_fin 14158.5849609375 +sparsity check 0.49999999148505075 +time 136.75 +21 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 30180.0546875 +err_fin 23343.21484375 +sparsity check 0.4999999701976776 +time 75.98 +21 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 11323.00390625 +err_fin 9676.36328125 +sparsity check 0.4999997615814209 +time 1.57 +21 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 15738.5107421875 +err_fin 14382.1328125 +sparsity check 0.4999997615814209 +time 1.34 +21 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 2251.770751953125 +err_fin 1092.03955078125 +sparsity check 0.4999999701976776 +time 68.88 +21 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 329349.25 +err_fin 244980.453125 +sparsity check 0.49999999148505075 +time 138.87 +21 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 320610.8125 +err_fin 237296.0 +sparsity check 0.49999999148505075 +time 139.15 +21 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 15934.818359375 +err_fin 14806.017578125 +sparsity check 0.49999999148505075 +time 136.72 +22 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 29016.478515625 +err_fin 22520.154296875 +sparsity check 0.4999999701976776 +time 75.99 +22 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 13485.953125 +err_fin 11753.7607421875 +sparsity check 0.4999997615814209 +time 1.37 +22 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 17941.8984375 +err_fin 16569.421875 +sparsity check 0.4999997615814209 +time 1.33 +22 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 1523.11279296875 +err_fin 656.5509033203125 +sparsity check 0.4999999701976776 +time 68.92 +22 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 346120.71875 +err_fin 258356.25 +sparsity check 0.49999999148505075 +time 138.87 +22 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 337648.8125 +err_fin 250853.78125 +sparsity check 0.49999999148505075 +time 139.25 +22 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 16303.919921875 +err_fin 15231.833984375 +sparsity check 0.49999999148505075 +time 136.75 +23 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 41636.0234375 +err_fin 32156.23828125 +sparsity check 0.4999999701976776 +time 76.00 +23 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 21291.38671875 +err_fin 18641.78125 +sparsity check 0.4999997615814209 +time 1.58 +23 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 18855.291015625 +err_fin 17419.83203125 +sparsity check 0.4999997615814209 +time 1.33 +23 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 2125.40185546875 +err_fin 998.30517578125 +sparsity check 0.4999999701976776 +time 68.95 +23 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 355178.125 +err_fin 265911.59375 +sparsity check 0.49999999148505075 +time 138.88 +23 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 347161.5625 +err_fin 258682.9375 +sparsity check 0.49999999148505075 +time 138.89 +23 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 16935.09375 +err_fin 15803.7958984375 +sparsity check 0.49999999148505075 +time 136.36 +24 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 36693.14453125 +err_fin 28276.1171875 +sparsity check 0.4999999701976776 +time 75.99 +24 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 18554.3984375 +err_fin 16096.0712890625 +sparsity check 0.4999997615814209 +time 1.36 +24 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 13973.544921875 +err_fin 12703.9609375 +sparsity check 0.4999997615814209 +time 1.34 +24 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 2061.354736328125 +err_fin 965.4517211914062 +sparsity check 0.4999999701976776 +time 68.95 +24 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 343603.90625 +err_fin 257796.65625 +sparsity check 0.49999999148505075 +time 138.92 +24 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 338258.9375 +err_fin 252935.359375 +sparsity check 0.49999999148505075 +time 138.93 +24 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 17086.5078125 +err_fin 15960.8994140625 +sparsity check 0.49999999148505075 +time 136.82 +25 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 41922.10546875 +err_fin 32402.29296875 +sparsity check 0.4999999701976776 +time 75.96 +25 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 21058.921875 +err_fin 18029.76171875 +sparsity check 0.4999997615814209 +time 1.36 +25 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 14476.837890625 +err_fin 13038.7958984375 +sparsity check 0.4999997615814209 +time 1.34 +25 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 4923.3603515625 +err_fin 2420.1044921875 +sparsity check 0.4999999701976776 +time 68.93 +25 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 323719.625 +err_fin 236912.96875 +sparsity check 0.49999999148505075 +time 138.97 +25 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 320709.0625 +err_fin 233890.0625 +sparsity check 0.49999999148505075 +time 138.99 +25 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 22923.40625 +err_fin 21021.1875 +sparsity check 0.49999999148505075 +time 136.42 +26 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 61113.9140625 +err_fin 46681.4296875 +sparsity check 0.4999999701976776 +time 75.97 +26 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 30957.59375 +err_fin 26995.5703125 +sparsity check 0.4999997615814209 +time 1.36 +26 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 21901.88671875 +err_fin 19881.6875 +sparsity check 0.4999997615814209 +time 1.34 +26 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 5200.109375 +err_fin 2634.781494140625 +sparsity check 0.4999999701976776 +time 68.87 +26 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 369029.5625 +err_fin 272602.0 +sparsity check 0.49999999148505075 +time 138.89 +26 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 364295.34375 +err_fin 268002.59375 +sparsity check 0.49999999148505075 +time 139.16 +26 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 23021.845703125 +err_fin 21498.466796875 +sparsity check 0.49999999148505075 +time 136.74 +27 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 28182.7890625 +err_fin 21998.55859375 +sparsity check 0.4999999701976776 +time 75.96 +27 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 13437.1181640625 +err_fin 11674.521484375 +sparsity check 0.4999997615814209 +time 1.36 +27 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 19074.416015625 +err_fin 17677.24609375 +sparsity check 0.4999997615814209 +time 1.35 +27 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 2304.667724609375 +err_fin 1010.9876098632812 +sparsity check 0.4999999701976776 +time 69.01 +27 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 391651.625 +err_fin 291297.09375 +sparsity check 0.49999999148505075 +time 138.90 +27 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 386030.875 +err_fin 285705.375 +sparsity check 0.49999999148505075 +time 139.21 +27 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 24193.625 +err_fin 22671.08984375 +sparsity check 0.49999999148505075 +time 136.84 +28 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 50516.8359375 +err_fin 39330.2578125 +sparsity check 0.4999999701976776 +time 75.96 +28 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 26966.166015625 +err_fin 23884.134765625 +sparsity check 0.4999997615814209 +time 1.36 +28 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 28710.95703125 +err_fin 26541.1484375 +sparsity check 0.4999997615814209 +time 1.34 +28 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 3617.527587890625 +err_fin 1803.8687744140625 +sparsity check 0.4999999701976776 +time 69.00 +28 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 413293.46875 +err_fin 309840.375 +sparsity check 0.49999999148505075 +time 138.85 +28 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 407833.6875 +err_fin 304104.65625 +sparsity check 0.49999999148505075 +time 139.19 +28 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 25484.3671875 +err_fin 23980.056640625 +sparsity check 0.49999999148505075 +time 136.76 +29 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 50058.328125 +err_fin 39188.953125 +sparsity check 0.4999999701976776 +time 75.96 +29 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 23116.28515625 +err_fin 20367.78515625 +sparsity check 0.4999997615814209 +time 1.35 +29 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 31365.10546875 +err_fin 29057.857421875 +sparsity check 0.4999997615814209 +time 1.34 +29 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 4273.2138671875 +err_fin 2122.2392578125 +sparsity check 0.4999999701976776 +time 68.89 +29 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 437865.5625 +err_fin 329919.40625 +sparsity check 0.49999999148505075 +time 138.91 +29 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 430689.4375 +err_fin 322876.5 +sparsity check 0.49999999148505075 +time 139.30 +29 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 26731.2890625 +err_fin 25263.79296875 +sparsity check 0.49999999148505075 +time 136.71 +30 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 26032.23828125 +err_fin 20426.23046875 +sparsity check 0.4999999701976776 +time 75.98 +30 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 9992.404296875 +err_fin 8730.005859375 +sparsity check 0.4999997615814209 +time 1.52 +30 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 29013.9921875 +err_fin 26994.16015625 +sparsity check 0.4999997615814209 +time 1.34 +30 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 2086.343505859375 +err_fin 1034.9847412109375 +sparsity check 0.4999999701976776 +time 68.86 +30 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 464651.03125 +err_fin 352017.46875 +sparsity check 0.49999999148505075 +time 138.94 +30 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 455522.78125 +err_fin 343208.84375 +sparsity check 0.49999999148505075 +time 139.30 +30 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 27662.810546875 +err_fin 26182.16796875 +sparsity check 0.49999999148505075 +time 136.89 +31 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 52242.87890625 +err_fin 41066.94921875 +sparsity check 0.4999999701976776 +time 76.03 +31 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 23725.396484375 +err_fin 20942.234375 +sparsity check 0.4999997615814209 +time 1.36 +31 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 30603.60546875 +err_fin 28189.00390625 +sparsity check 0.4999997615814209 +time 1.35 +31 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 4107.51171875 +err_fin 2171.7705078125 +sparsity check 0.4999999701976776 +time 68.91 +31 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 484873.3125 +err_fin 370143.375 +sparsity check 0.49999999148505075 +time 138.93 +31 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 474644.0625 +err_fin 360518.09375 +sparsity check 0.49999999148505075 +time 139.24 +31 mlp.down_proj +Pruning ... +0.4999999872275761 0.2499999850988388 0.4285714200564793 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218755977567 0.9709505944546686 1.0 +err_prefin 28457.56640625 +err_fin 26989.67578125 +sparsity check 0.4999999872275761 +time 136.82 +32 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 59466.06640625 +err_fin 46928.921875 +sparsity check 0.4999999701976776 +time 75.97 +32 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 29597.052734375 +err_fin 26552.65234375 +sparsity check 0.4999997615814209 +time 1.62 +32 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 27953.755859375 +err_fin 25990.2265625 +sparsity check 0.4999997615814209 +time 1.34 +32 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 2870.14453125 +err_fin 1423.8023681640625 +sparsity check 0.4999999701976776 +time 68.91 +32 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 496590.8125 +err_fin 378051.59375 +sparsity check 0.49999999148505075 +time 138.96 +32 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 487239.75 +err_fin 369091.21875 +sparsity check 0.49999999148505075 +time 139.26 +32 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 30070.58984375 +err_fin 28461.32421875 +sparsity check 0.49999999148505075 +time 136.91 +33 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 83937.4140625 +err_fin 65693.96875 +sparsity check 0.4999999701976776 +time 75.92 +33 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 39462.5234375 +err_fin 35009.5546875 +sparsity check 0.4999997615814209 +time 1.35 +33 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 30598.1484375 +err_fin 28102.85546875 +sparsity check 0.4999997615814209 +time 1.34 +33 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 5600.32373046875 +err_fin 2763.80712890625 +sparsity check 0.4999999701976776 +time 68.86 +33 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 502270.46875 +err_fin 377882.3125 +sparsity check 0.49999999148505075 +time 138.87 +33 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 494249.8125 +err_fin 370007.4375 +sparsity check 0.49999999148505075 +time 139.20 +33 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 34105.328125 +err_fin 32116.03125 +sparsity check 0.49999999148505075 +time 136.67 +34 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 30010.17578125 +err_fin 23505.6015625 +sparsity check 0.4999999701976776 +time 75.99 +34 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 12229.2001953125 +err_fin 10336.8369140625 +sparsity check 0.4999997615814209 +time 1.34 +34 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 18207.46875 +err_fin 16517.2109375 +sparsity check 0.4999997615814209 +time 1.33 +34 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 3952.8564453125 +err_fin 2030.096923828125 +sparsity check 0.4999999701976776 +time 68.91 +34 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 517865.46875 +err_fin 391603.0 +sparsity check 0.49999999148505075 +time 139.05 +34 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 509327.125 +err_fin 383224.625 +sparsity check 0.49999999148505075 +time 139.37 +34 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 37172.03125 +err_fin 35148.7734375 +sparsity check 0.49999999148505075 +time 136.87 +35 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 49801.08203125 +err_fin 39204.73046875 +sparsity check 0.4999999701976776 +time 76.02 +35 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 20728.83203125 +err_fin 18156.15625 +sparsity check 0.4999997615814209 +time 1.36 +35 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 25123.96484375 +err_fin 23182.5234375 +sparsity check 0.4999997615814209 +time 1.33 +35 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 4984.70703125 +err_fin 2389.52978515625 +sparsity check 0.4999999701976776 +time 68.92 +35 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 549537.625 +err_fin 416325.875 +sparsity check 0.49999999148505075 +time 139.09 +35 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 539814.0 +err_fin 406956.34375 +sparsity check 0.49999999148505075 +time 139.37 +35 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 40008.2109375 +err_fin 37887.08203125 +sparsity check 0.49999999148505075 +time 136.92 +36 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 47921.19921875 +err_fin 37761.7421875 +sparsity check 0.4999999701976776 +time 76.00 +36 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 20009.556640625 +err_fin 17598.63671875 +sparsity check 0.4999997615814209 +time 1.37 +36 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 24585.205078125 +err_fin 22760.03125 +sparsity check 0.4999997615814209 +time 1.33 +36 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 3131.942138671875 +err_fin 1448.037353515625 +sparsity check 0.4999999701976776 +time 68.94 +36 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 561929.125 +err_fin 425099.125 +sparsity check 0.49999999148505075 +time 138.92 +36 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 552476.0625 +err_fin 415964.1875 +sparsity check 0.49999999148505075 +time 139.33 +36 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 44609.0078125 +err_fin 42124.6953125 +sparsity check 0.49999999148505075 +time 136.74 +37 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 74911.2265625 +err_fin 58578.91796875 +sparsity check 0.4999999701976776 +time 75.96 +37 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 34300.6171875 +err_fin 30461.64453125 +sparsity check 0.4999997615814209 +time 1.37 +37 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 31333.7265625 +err_fin 29009.8125 +sparsity check 0.4999997615814209 +time 1.34 +37 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 6033.38232421875 +err_fin 2680.28857421875 +sparsity check 0.4999999701976776 +time 68.97 +37 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 586511.5625 +err_fin 440298.9375 +sparsity check 0.49999999148505075 +time 138.95 +37 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 576712.9375 +err_fin 430935.0625 +sparsity check 0.49999999148505075 +time 138.94 +37 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 48698.3515625 +err_fin 45944.84375 +sparsity check 0.49999999148505075 +time 136.38 +38 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 84190.71875 +err_fin 65235.796875 +sparsity check 0.4999999701976776 +time 76.00 +38 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 38048.1484375 +err_fin 33385.6640625 +sparsity check 0.4999997615814209 +time 1.37 +38 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 32509.40234375 +err_fin 29642.828125 +sparsity check 0.4999997615814209 +time 1.34 +38 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 9220.7138671875 +err_fin 4203.4375 +sparsity check 0.4999999701976776 +time 68.99 +38 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 603327.75 +err_fin 451749.90625 +sparsity check 0.49999999148505075 +time 138.93 +38 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 594840.375 +err_fin 443057.9375 +sparsity check 0.49999999148505075 +time 139.26 +38 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 54537.9375 +err_fin 51356.5625 +sparsity check 0.49999999148505075 +time 136.76 +39 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 102629.9296875 +err_fin 79059.234375 +sparsity check 0.4999999701976776 +time 75.96 +39 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 51605.64453125 +err_fin 45724.78125 +sparsity check 0.4999997615814209 +time 1.35 +39 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 35260.58203125 +err_fin 32227.84375 +sparsity check 0.4999997615814209 +time 1.34 +39 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 11472.75390625 +err_fin 4916.529296875 +sparsity check 0.4999999701976776 +time 68.94 +39 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 608882.0625 +err_fin 449760.6875 +sparsity check 0.49999999148505075 +time 138.93 +39 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 600752.25 +err_fin 441877.0 +sparsity check 0.49999999148505075 +time 138.89 +39 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 66603.75 +err_fin 61970.1328125 +sparsity check 0.49999999148505075 +time 136.62 +40 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 95904.0 +err_fin 72537.21875 +sparsity check 0.4999999701976776 +time 75.98 +40 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 44928.734375 +err_fin 39576.1484375 +sparsity check 0.4999997615814209 +time 1.36 +40 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 39848.4140625 +err_fin 35984.0078125 +sparsity check 0.4999997615814209 +time 1.34 +40 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 16308.015625 +err_fin 8537.3857421875 +sparsity check 0.4999999701976776 +time 68.97 +40 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 631623.625 +err_fin 458495.96875 +sparsity check 0.49999999148505075 +time 138.92 +40 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 622759.1875 +err_fin 449699.3125 +sparsity check 0.49999999148505075 +time 139.24 +40 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 74991.9921875 +err_fin 69863.890625 +sparsity check 0.49999999148505075 +time 136.85 +41 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 77544.609375 +err_fin 58233.296875 +sparsity check 0.4999999701976776 +time 75.95 +41 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 34891.703125 +err_fin 30307.580078125 +sparsity check 0.4999997615814209 +time 1.51 +41 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 35122.8515625 +err_fin 31752.765625 +sparsity check 0.4999997615814209 +time 1.32 +41 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 17705.14453125 +err_fin 8125.5283203125 +sparsity check 0.4999999701976776 +time 68.88 +41 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 658906.0 +err_fin 467518.375 +sparsity check 0.49999999148505075 +time 138.89 +41 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 639130.25 +err_fin 450139.3125 +sparsity check 0.49999999148505075 +time 139.26 +41 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 87729.15625 +err_fin 80864.71875 +sparsity check 0.49999999148505075 +time 136.80 +42 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 89561.28125 +err_fin 65998.328125 +sparsity check 0.4999999701976776 +time 75.95 +42 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 37875.28125 +err_fin 33061.8125 +sparsity check 0.4999997615814209 +time 1.56 +42 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 43199.59375 +err_fin 39259.875 +sparsity check 0.4999997615814209 +time 1.34 +42 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 17064.9765625 +err_fin 8395.009765625 +sparsity check 0.4999999701976776 +time 68.94 +42 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 712042.875 +err_fin 499760.4375 +sparsity check 0.49999999148505075 +time 138.91 +42 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 675021.0 +err_fin 470063.28125 +sparsity check 0.49999999148505075 +time 139.32 +42 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 96181.5859375 +err_fin 88354.796875 +sparsity check 0.49999999148505075 +time 136.60 +43 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 62440.1484375 +err_fin 45790.125 +sparsity check 0.4999999701976776 +time 75.95 +43 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 30658.6875 +err_fin 26492.27734375 +sparsity check 0.4999997615814209 +time 1.57 +43 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 33607.234375 +err_fin 30055.046875 +sparsity check 0.4999997615814209 +time 1.38 +43 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 11535.37109375 +err_fin 6193.34130859375 +sparsity check 0.4999999701976776 +time 68.88 +43 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 740046.75 +err_fin 516331.59375 +sparsity check 0.49999999148505075 +time 138.91 +43 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 695492.625 +err_fin 481200.125 +sparsity check 0.49999999148505075 +time 138.88 +43 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 106719.265625 +err_fin 97812.765625 +sparsity check 0.49999999148505075 +time 136.83 +44 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 112646.9375 +err_fin 81647.5625 +sparsity check 0.4999999701976776 +time 75.94 +44 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 40573.47265625 +err_fin 35626.34375 +sparsity check 0.4999997615814209 +time 1.49 +44 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 52956.95703125 +err_fin 48484.390625 +sparsity check 0.4999997615814209 +time 1.35 +44 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 27315.21875 +err_fin 14398.6630859375 +sparsity check 0.4999999701976776 +time 68.88 +44 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 811938.8125 +err_fin 557512.125 +sparsity check 0.49999999148505075 +time 138.90 +44 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 737602.0 +err_fin 500840.0625 +sparsity check 0.49999999148505075 +time 139.20 +44 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 124876.71875 +err_fin 112474.78125 +sparsity check 0.49999999148505075 +time 136.65 +45 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 70487.9375 +err_fin 50279.3671875 +sparsity check 0.4999999701976776 +time 76.01 +45 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 32180.87109375 +err_fin 27765.591796875 +sparsity check 0.4999997615814209 +time 1.37 +45 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 51670.9296875 +err_fin 47043.04296875 +sparsity check 0.4999997615814209 +time 1.36 +45 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 8514.8125 +err_fin 4562.552734375 +sparsity check 0.4999999701976776 +time 68.86 +45 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 880760.625 +err_fin 604341.5 +sparsity check 0.49999999148505075 +time 138.91 +45 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 790777.375 +err_fin 536047.125 +sparsity check 0.49999999148505075 +time 138.94 +45 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 129625.2890625 +err_fin 116981.046875 +sparsity check 0.49999999148505075 +time 136.81 +46 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 60139.3671875 +err_fin 42844.4375 +sparsity check 0.4999999701976776 +time 75.94 +46 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 26271.033203125 +err_fin 22923.193359375 +sparsity check 0.4999997615814209 +time 1.38 +46 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 44671.41015625 +err_fin 41058.53125 +sparsity check 0.4999997615814209 +time 1.32 +46 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 10887.734375 +err_fin 5889.4287109375 +sparsity check 0.4999999701976776 +time 68.92 +46 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 946446.625 +err_fin 652435.25 +sparsity check 0.49999999148505075 +time 138.85 +46 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 836977.1875 +err_fin 570130.125 +sparsity check 0.49999999148505075 +time 139.17 +46 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 130404.3125 +err_fin 118459.3828125 +sparsity check 0.49999999148505075 +time 136.73 +47 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 81630.78125 +err_fin 58640.24609375 +sparsity check 0.4999999701976776 +time 75.99 +47 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 37684.5 +err_fin 32753.998046875 +sparsity check 0.4999997615814209 +time 1.56 +47 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 43192.5078125 +err_fin 39257.546875 +sparsity check 0.4999997615814209 +time 1.34 +47 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 17707.228515625 +err_fin 7706.35009765625 +sparsity check 0.4999999701976776 +time 69.01 +47 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 982978.0 +err_fin 670679.625 +sparsity check 0.49999999148505075 +time 138.97 +47 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 857838.8125 +err_fin 577950.375 +sparsity check 0.49999999148505075 +time 139.26 +47 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 146335.8125 +err_fin 131420.78125 +sparsity check 0.49999999148505075 +time 136.71 +48 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 41655.484375 +err_fin 29628.703125 +sparsity check 0.4999999701976776 +time 75.99 +48 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 14035.11328125 +err_fin 11946.5283203125 +sparsity check 0.4999997615814209 +time 1.51 +48 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 42893.5 +err_fin 39142.015625 +sparsity check 0.4999997615814209 +time 1.35 +48 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 11646.501953125 +err_fin 6169.419921875 +sparsity check 0.4999999701976776 +time 68.97 +48 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1029321.1875 +err_fin 699118.0 +sparsity check 0.49999999148505075 +time 138.92 +48 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 894293.875 +err_fin 599440.0 +sparsity check 0.49999999148505075 +time 139.21 +48 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 146734.140625 +err_fin 132421.6875 +sparsity check 0.49999999148505075 +time 136.81 +49 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 23678.34765625 +err_fin 16803.919921875 +sparsity check 0.4999999701976776 +time 75.93 +49 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 9243.3369140625 +err_fin 7814.763671875 +sparsity check 0.4999997615814209 +time 1.34 +49 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 34349.76953125 +err_fin 30772.12890625 +sparsity check 0.4999997615814209 +time 1.34 +49 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 4523.0859375 +err_fin 2152.568603515625 +sparsity check 0.4999999701976776 +time 68.84 +49 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1071935.5 +err_fin 728369.125 +sparsity check 0.49999999148505075 +time 138.89 +49 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 929909.875 +err_fin 623808.25 +sparsity check 0.49999999148505075 +time 139.17 +49 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 147430.875 +err_fin 133642.25 +sparsity check 0.49999999148505075 +time 136.77 +50 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 37368.7265625 +err_fin 26441.6953125 +sparsity check 0.4999999701976776 +time 76.00 +50 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 12596.9892578125 +err_fin 10804.3369140625 +sparsity check 0.4999997615814209 +time 1.52 +50 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 44531.25 +err_fin 40123.015625 +sparsity check 0.4999997615814209 +time 1.34 +50 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 8514.12890625 +err_fin 4372.64111328125 +sparsity check 0.4999999701976776 +time 68.95 +50 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1113480.625 +err_fin 758171.3125 +sparsity check 0.49999999148505075 +time 138.95 +50 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 962699.9375 +err_fin 647435.0 +sparsity check 0.49999999148505075 +time 139.22 +50 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 148243.84375 +err_fin 134802.15625 +sparsity check 0.49999999148505075 +time 136.84 +51 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 47897.08203125 +err_fin 33948.296875 +sparsity check 0.4999999701976776 +time 75.99 +51 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 19351.279296875 +err_fin 16782.765625 +sparsity check 0.4999997615814209 +time 1.57 +51 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 46854.2265625 +err_fin 42734.59765625 +sparsity check 0.4999997615814209 +time 1.34 +51 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 7749.6162109375 +err_fin 3622.430419921875 +sparsity check 0.4999999701976776 +time 68.89 +51 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1145907.5 +err_fin 782892.875 +sparsity check 0.49999999148505075 +time 138.88 +51 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 993129.375 +err_fin 670268.75 +sparsity check 0.49999999148505075 +time 139.17 +51 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 148951.9375 +err_fin 135756.546875 +sparsity check 0.49999999148505075 +time 136.77 +52 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 80784.09375 +err_fin 57475.87890625 +sparsity check 0.4999999701976776 +time 76.02 +52 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 25809.490234375 +err_fin 22641.80859375 +sparsity check 0.4999997615814209 +time 1.34 +52 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 53536.453125 +err_fin 48914.1953125 +sparsity check 0.4999997615814209 +time 1.33 +52 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 15624.708984375 +err_fin 8132.90966796875 +sparsity check 0.4999999701976776 +time 68.91 +52 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1177332.25 +err_fin 808526.6875 +sparsity check 0.49999999148505075 +time 138.95 +52 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1019252.5 +err_fin 691205.625 +sparsity check 0.49999999148505075 +time 139.23 +52 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 152510.71875 +err_fin 139273.8125 +sparsity check 0.49999999148505075 +time 136.73 +53 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 25649.494140625 +err_fin 18411.953125 +sparsity check 0.4999999701976776 +time 75.99 +53 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 7535.6396484375 +err_fin 6428.8427734375 +sparsity check 0.4999997615814209 +time 1.37 +53 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 44588.1484375 +err_fin 40414.9140625 +sparsity check 0.4999997615814209 +time 1.34 +53 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 6829.2353515625 +err_fin 3247.128173828125 +sparsity check 0.4999999701976776 +time 68.89 +53 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1209166.625 +err_fin 829818.75 +sparsity check 0.49999999148505075 +time 138.86 +53 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1049038.875 +err_fin 711064.875 +sparsity check 0.49999999148505075 +time 139.17 +53 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 154057.25 +err_fin 140811.40625 +sparsity check 0.49999999148505075 +time 136.69 +54 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 29969.533203125 +err_fin 21441.796875 +sparsity check 0.4999999701976776 +time 75.96 +54 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 8616.0869140625 +err_fin 7288.6494140625 +sparsity check 0.4999997615814209 +time 1.36 +54 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 45932.27734375 +err_fin 41775.0546875 +sparsity check 0.4999997615814209 +time 1.34 +54 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 7486.509765625 +err_fin 3676.146484375 +sparsity check 0.4999999701976776 +time 68.88 +54 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1239661.625 +err_fin 853401.875 +sparsity check 0.49999999148505075 +time 138.85 +54 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1077716.75 +err_fin 732870.1875 +sparsity check 0.49999999148505075 +time 139.19 +54 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 154116.6875 +err_fin 141300.8125 +sparsity check 0.49999999148505075 +time 136.87 +55 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 48705.125 +err_fin 34947.796875 +sparsity check 0.4999999701976776 +time 75.95 +55 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 16973.052734375 +err_fin 14659.681640625 +sparsity check 0.4999997615814209 +time 1.38 +55 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 59642.6875 +err_fin 54076.9296875 +sparsity check 0.4999997615814209 +time 1.38 +55 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 14595.42578125 +err_fin 6802.30859375 +sparsity check 0.4999999701976776 +time 68.94 +55 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1263997.25 +err_fin 872225.25 +sparsity check 0.49999999148505075 +time 138.89 +55 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1107316.75 +err_fin 755348.75 +sparsity check 0.49999999148505075 +time 139.25 +55 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 157797.8125 +err_fin 145139.59375 +sparsity check 0.49999999148505075 +time 136.74 +56 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 36475.2578125 +err_fin 26326.166015625 +sparsity check 0.4999999701976776 +time 75.96 +56 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 13547.791015625 +err_fin 11655.0546875 +sparsity check 0.4999997615814209 +time 1.36 +56 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 42385.390625 +err_fin 38619.52734375 +sparsity check 0.4999997615814209 +time 1.33 +56 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 7914.76416015625 +err_fin 3646.15185546875 +sparsity check 0.4999999701976776 +time 68.93 +56 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1286053.75 +err_fin 887569.0625 +sparsity check 0.49999999148505075 +time 138.89 +56 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1129318.25 +err_fin 770670.5 +sparsity check 0.49999999148505075 +time 139.21 +56 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 160301.1875 +err_fin 147516.8125 +sparsity check 0.49999999148505075 +time 136.81 +57 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 33035.65625 +err_fin 23655.130859375 +sparsity check 0.4999999701976776 +time 75.92 +57 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 10187.09765625 +err_fin 8785.1015625 +sparsity check 0.4999997615814209 +time 1.36 +57 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 54517.59375 +err_fin 49678.390625 +sparsity check 0.4999997615814209 +time 1.32 +57 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 8301.5009765625 +err_fin 4029.38525390625 +sparsity check 0.4999999701976776 +time 68.92 +57 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1327755.375 +err_fin 918436.375 +sparsity check 0.49999999148505075 +time 138.88 +57 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1167561.625 +err_fin 799117.75 +sparsity check 0.49999999148505075 +time 139.09 +57 mlp.down_proj +Pruning ... +0.4999999872275761 0.2499999701976776 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218706168299 0.9709505944546686 1.0 +err_prefin 163075.703125 +err_fin 150484.9375 +sparsity check 0.4999999872275761 +time 136.73 +58 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 18900.6484375 +err_fin 13654.41015625 +sparsity check 0.4999999701976776 +time 75.92 +58 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 5345.603515625 +err_fin 4459.5361328125 +sparsity check 0.4999997615814209 +time 1.36 +58 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 39029.65234375 +err_fin 35217.87109375 +sparsity check 0.4999997615814209 +time 1.35 +58 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 5688.50634765625 +err_fin 2834.5869140625 +sparsity check 0.4999999701976776 +time 68.87 +58 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1357102.125 +err_fin 942058.375 +sparsity check 0.49999999148505075 +time 138.87 +58 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1195968.75 +err_fin 821476.5 +sparsity check 0.49999999148505075 +time 139.18 +58 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 164241.65625 +err_fin 151835.625 +sparsity check 0.49999999148505075 +time 136.75 +59 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 23973.05859375 +err_fin 17286.46875 +sparsity check 0.4999999701976776 +time 75.95 +59 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 6800.9345703125 +err_fin 5795.3671875 +sparsity check 0.4999997615814209 +time 1.36 +59 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 43333.12109375 +err_fin 38987.5859375 +sparsity check 0.4999997615814209 +time 1.34 +59 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 5351.494140625 +err_fin 2592.425537109375 +sparsity check 0.4999999701976776 +time 68.90 +59 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1390754.75 +err_fin 968292.125 +sparsity check 0.49999999148505075 +time 138.90 +59 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1228030.375 +err_fin 846357.375 +sparsity check 0.49999999148505075 +time 138.88 +59 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 164491.46875 +err_fin 152553.453125 +sparsity check 0.49999999148505075 +time 136.36 +60 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 4144.17578125 +err_fin 2987.91259765625 +sparsity check 0.4999999701976776 +time 75.95 +60 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 1082.2666015625 +err_fin 884.3472900390625 +sparsity check 0.4999997615814209 +time 1.36 +60 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 23628.416015625 +err_fin 21147.66015625 +sparsity check 0.4999997615814209 +time 1.33 +60 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 3283.666015625 +err_fin 1680.838134765625 +sparsity check 0.4999999701976776 +time 68.87 +60 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1413586.75 +err_fin 988726.5 +sparsity check 0.49999999148505075 +time 138.83 +60 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1253862.875 +err_fin 868783.25 +sparsity check 0.49999999148505075 +time 139.15 +60 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 164164.6875 +err_fin 152643.6875 +sparsity check 0.49999999148505075 +time 136.74 +61 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 15371.099609375 +err_fin 11208.525390625 +sparsity check 0.4999999701976776 +time 75.96 +61 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 4000.73388671875 +err_fin 3378.706787109375 +sparsity check 0.4999997615814209 +time 1.38 +61 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 40129.45703125 +err_fin 36481.453125 +sparsity check 0.4999997615814209 +time 1.34 +61 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 5390.501953125 +err_fin 2717.38037109375 +sparsity check 0.4999999701976776 +time 68.89 +61 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1440616.0 +err_fin 1010316.6875 +sparsity check 0.49999999148505075 +time 138.89 +61 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1284281.875 +err_fin 891866.625 +sparsity check 0.49999999148505075 +time 138.90 +61 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 164991.875 +err_fin 153754.4375 +sparsity check 0.49999999148505075 +time 136.36 +62 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 16652.97265625 +err_fin 12181.880859375 +sparsity check 0.4999999701976776 +time 75.96 +62 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 4841.4033203125 +err_fin 4092.40771484375 +sparsity check 0.4999997615814209 +time 1.34 +62 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 43296.96484375 +err_fin 39043.84375 +sparsity check 0.4999997615814209 +time 1.36 +62 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 4511.126953125 +err_fin 2035.7476806640625 +sparsity check 0.4999999701976776 +time 68.90 +62 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1472194.625 +err_fin 1036582.5 +sparsity check 0.49999999148505075 +time 138.91 +62 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1319016.75 +err_fin 919966.0 +sparsity check 0.49999999148505075 +time 139.19 +62 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 166857.40625 +err_fin 155675.09375 +sparsity check 0.49999999148505075 +time 136.79 +63 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 15162.9658203125 +err_fin 11185.603515625 +sparsity check 0.4999999701976776 +time 75.93 +63 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 3691.4169921875 +err_fin 3106.723388671875 +sparsity check 0.4999997615814209 +time 1.59 +63 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 33136.32421875 +err_fin 29420.41796875 +sparsity check 0.4999997615814209 +time 1.37 +63 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 3492.001708984375 +err_fin 1740.047119140625 +sparsity check 0.4999999701976776 +time 68.85 +63 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1507286.125 +err_fin 1064654.25 +sparsity check 0.49999999148505075 +time 138.91 +63 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1352012.125 +err_fin 947054.25 +sparsity check 0.49999999148505075 +time 138.84 +63 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 169400.21875 +err_fin 158286.328125 +sparsity check 0.49999999148505075 +time 136.67 +64 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 27923.83984375 +err_fin 20545.359375 +sparsity check 0.4999999701976776 +time 75.98 +64 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 8380.58984375 +err_fin 7131.638671875 +sparsity check 0.4999997615814209 +time 1.35 +64 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 49114.46484375 +err_fin 44746.3828125 +sparsity check 0.4999997615814209 +time 1.35 +64 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 7756.88671875 +err_fin 3477.064453125 +sparsity check 0.4999999701976776 +time 68.85 +64 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1537956.125 +err_fin 1089393.5 +sparsity check 0.49999999148505075 +time 138.85 +64 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1384552.125 +err_fin 972593.1875 +sparsity check 0.49999999148505075 +time 139.24 +64 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 172489.484375 +err_fin 161310.234375 +sparsity check 0.49999999148505075 +time 136.71 +65 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 7846.609375 +err_fin 5812.009765625 +sparsity check 0.4999999701976776 +time 75.97 +65 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 1819.07470703125 +err_fin 1485.857177734375 +sparsity check 0.4999997615814209 +time 1.34 +65 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 27047.845703125 +err_fin 24197.2421875 +sparsity check 0.4999997615814209 +time 1.33 +65 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 3190.85546875 +err_fin 1521.27197265625 +sparsity check 0.4999999701976776 +time 68.88 +65 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1566413.25 +err_fin 1113701.0 +sparsity check 0.49999999148505075 +time 138.87 +65 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1415837.0 +err_fin 999027.0 +sparsity check 0.49999999148505075 +time 139.19 +65 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 175696.875 +err_fin 164576.796875 +sparsity check 0.49999999148505075 +time 136.83 +66 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 13434.75 +err_fin 9914.8017578125 +sparsity check 0.4999999701976776 +time 76.00 +66 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 3435.99365234375 +err_fin 2858.035400390625 +sparsity check 0.4999997615814209 +time 1.34 +66 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 35824.453125 +err_fin 32533.126953125 +sparsity check 0.4999997615814209 +time 1.33 +66 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 6657.2587890625 +err_fin 3521.1337890625 +sparsity check 0.4999999701976776 +time 68.82 +66 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1604164.5 +err_fin 1140492.0 +sparsity check 0.49999999148505075 +time 138.92 +66 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1458797.75 +err_fin 1029967.8125 +sparsity check 0.49999999148505075 +time 139.29 +66 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 183853.59375 +err_fin 172169.96875 +sparsity check 0.49999999148505075 +time 136.88 +67 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 6373.236328125 +err_fin 4719.2001953125 +sparsity check 0.4999999701976776 +time 75.97 +67 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 1663.343994140625 +err_fin 1330.06787109375 +sparsity check 0.4999997615814209 +time 1.36 +67 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 14204.0703125 +err_fin 12517.884765625 +sparsity check 0.4999997615814209 +time 1.33 +67 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 1791.4381103515625 +err_fin 752.0675048828125 +sparsity check 0.4999999701976776 +time 69.02 +67 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1622045.875 +err_fin 1155999.375 +sparsity check 0.49999999148505075 +time 138.94 +67 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1486747.0 +err_fin 1052275.875 +sparsity check 0.49999999148505075 +time 139.22 +67 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 186473.1875 +err_fin 174738.625 +sparsity check 0.49999999148505075 +time 136.88 +68 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 51552.6484375 +err_fin 38091.5234375 +sparsity check 0.4999999701976776 +time 75.94 +68 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 19221.5234375 +err_fin 16823.7265625 +sparsity check 0.4999997615814209 +time 1.36 +68 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 71151.9140625 +err_fin 65181.734375 +sparsity check 0.4999997615814209 +time 1.34 +68 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 7288.1435546875 +err_fin 3690.9140625 +sparsity check 0.4999999701976776 +time 68.89 +68 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1680152.0 +err_fin 1199584.5 +sparsity check 0.49999999148505075 +time 139.07 +68 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1547915.75 +err_fin 1097996.0 +sparsity check 0.49999999148505075 +time 139.32 +68 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 199138.234375 +err_fin 186279.90625 +sparsity check 0.49999999148505075 +time 136.83 +69 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 86524.171875 +err_fin 64054.6796875 +sparsity check 0.4999999701976776 +time 75.96 +69 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 44817.75 +err_fin 39679.12109375 +sparsity check 0.4999997615814209 +time 1.52 +69 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 93110.2578125 +err_fin 84286.015625 +sparsity check 0.4999997615814209 +time 1.35 +69 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 11422.0576171875 +err_fin 5443.55712890625 +sparsity check 0.4999999701976776 +time 68.90 +69 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1712167.25 +err_fin 1222631.0 +sparsity check 0.49999999148505075 +time 139.05 +69 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1595281.0 +err_fin 1132071.125 +sparsity check 0.49999999148505075 +time 139.24 +69 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 209077.484375 +err_fin 195700.515625 +sparsity check 0.49999999148505075 +time 136.84 +70 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 49750.33984375 +err_fin 36952.296875 +sparsity check 0.4999999701976776 +time 75.92 +70 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 19357.1484375 +err_fin 16941.390625 +sparsity check 0.4999997615814209 +time 1.34 +70 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 53286.68359375 +err_fin 48323.54296875 +sparsity check 0.4999997615814209 +time 1.33 +70 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 8727.7646484375 +err_fin 4174.57666015625 +sparsity check 0.4999999701976776 +time 68.85 +70 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1752425.25 +err_fin 1251169.75 +sparsity check 0.49999999148505075 +time 138.85 +70 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1640509.25 +err_fin 1165366.25 +sparsity check 0.49999999148505075 +time 139.19 +70 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 223513.625 +err_fin 209054.53125 +sparsity check 0.49999999148505075 +time 136.75 +71 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 67431.9296875 +err_fin 50007.89453125 +sparsity check 0.4999999701976776 +time 75.93 +71 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 33746.6015625 +err_fin 29709.49609375 +sparsity check 0.4999997615814209 +time 1.37 +71 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 57293.171875 +err_fin 52068.56640625 +sparsity check 0.4999997615814209 +time 1.34 +71 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 11074.923828125 +err_fin 5226.69140625 +sparsity check 0.4999999701976776 +time 68.91 +71 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1807681.625 +err_fin 1288131.25 +sparsity check 0.49999999148505075 +time 138.86 +71 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1697690.5 +err_fin 1203887.5 +sparsity check 0.49999999148505075 +time 139.18 +71 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 237044.9375 +err_fin 221369.625 +sparsity check 0.49999999148505075 +time 136.83 +72 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 93618.8125 +err_fin 69071.015625 +sparsity check 0.4999999701976776 +time 75.95 +72 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 48187.52734375 +err_fin 42756.87890625 +sparsity check 0.4999997615814209 +time 1.36 +72 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 67075.671875 +err_fin 61100.4296875 +sparsity check 0.4999997615814209 +time 1.34 +72 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 14306.2294921875 +err_fin 7519.7373046875 +sparsity check 0.4999999701976776 +time 69.02 +72 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1851395.25 +err_fin 1315942.0 +sparsity check 0.49999999148505075 +time 138.87 +72 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1749960.5 +err_fin 1238201.0 +sparsity check 0.49999999148505075 +time 139.24 +72 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 254901.0625 +err_fin 237732.640625 +sparsity check 0.49999999148505075 +time 136.71 +73 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 86980.6171875 +err_fin 63962.2265625 +sparsity check 0.4999999701976776 +time 76.01 +73 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 47429.7578125 +err_fin 42070.00390625 +sparsity check 0.4999997615814209 +time 1.39 +73 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 76576.703125 +err_fin 68824.5703125 +sparsity check 0.4999997615814209 +time 1.33 +73 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 9243.6279296875 +err_fin 4813.7822265625 +sparsity check 0.4999999701976776 +time 68.90 +73 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1896136.5 +err_fin 1343811.0 +sparsity check 0.49999999148505075 +time 138.88 +73 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1800215.875 +err_fin 1270323.25 +sparsity check 0.49999999148505075 +time 139.21 +73 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 280490.6875 +err_fin 260274.953125 +sparsity check 0.49999999148505075 +time 136.73 +74 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 79794.359375 +err_fin 58623.734375 +sparsity check 0.4999999701976776 +time 75.98 +74 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 38902.015625 +err_fin 34011.27734375 +sparsity check 0.4999997615814209 +time 1.35 +74 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 57903.9140625 +err_fin 51672.7109375 +sparsity check 0.4999997615814209 +time 1.35 +74 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 18365.837890625 +err_fin 8362.603515625 +sparsity check 0.4999999701976776 +time 69.05 +74 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1939999.5 +err_fin 1359626.5 +sparsity check 0.49999999148505075 +time 138.90 +74 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1847464.0 +err_fin 1289308.75 +sparsity check 0.49999999148505075 +time 139.21 +74 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 308198.96875 +err_fin 284229.5625 +sparsity check 0.49999999148505075 +time 136.83 +75 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 80345.03125 +err_fin 58214.7421875 +sparsity check 0.4999999701976776 +time 75.94 +75 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 38695.88671875 +err_fin 33634.078125 +sparsity check 0.4999997615814209 +time 1.34 +75 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 65055.8046875 +err_fin 57275.7109375 +sparsity check 0.4999997615814209 +time 1.33 +75 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 16365.791015625 +err_fin 7433.798828125 +sparsity check 0.4999999701976776 +time 68.93 +75 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1954902.0 +err_fin 1359252.625 +sparsity check 0.49999999148505075 +time 138.84 +75 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1860311.875 +err_fin 1288227.5 +sparsity check 0.49999999148505075 +time 139.20 +75 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 346329.8125 +err_fin 315204.3125 +sparsity check 0.49999999148505075 +time 136.80 +76 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 120414.59375 +err_fin 85542.703125 +sparsity check 0.4999999701976776 +time 75.95 +76 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 55974.1171875 +err_fin 47984.98046875 +sparsity check 0.4999997615814209 +time 1.38 +76 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 101912.640625 +err_fin 89068.21875 +sparsity check 0.4999997615814209 +time 1.34 +76 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 59227.875 +err_fin 30573.484375 +sparsity check 0.4999999701976776 +time 68.86 +76 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1939324.0 +err_fin 1319095.5 +sparsity check 0.49999999148505075 +time 138.87 +76 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1831292.625 +err_fin 1240294.0 +sparsity check 0.49999999148505075 +time 138.87 +76 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 416353.4375 +err_fin 369301.09375 +sparsity check 0.49999999148505075 +time 136.85 +77 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 79319.734375 +err_fin 55092.390625 +sparsity check 0.4999999701976776 +time 75.92 +77 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 39984.7890625 +err_fin 33587.5859375 +sparsity check 0.4999997615814209 +time 1.59 +77 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 68770.40625 +err_fin 58922.96875 +sparsity check 0.4999997615814209 +time 1.33 +77 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 23712.392578125 +err_fin 9312.755859375 +sparsity check 0.4999999701976776 +time 68.95 +77 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1806763.25 +err_fin 1191378.0 +sparsity check 0.49999999148505075 +time 138.91 +77 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1707377.125 +err_fin 1120868.5 +sparsity check 0.49999999148505075 +time 139.24 +77 mlp.down_proj +Pruning ... +0.4999999872275761 0.2499999850988388 0.4285714200564793 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218755977567 0.9709505944546686 1.0 +err_prefin 478436.125 +err_fin 409042.5 +sparsity check 0.4999999872275761 +time 136.94 +78 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 70711.5 +err_fin 46775.28125 +sparsity check 0.4999999701976776 +time 75.93 +78 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 37181.0546875 +err_fin 29953.458984375 +sparsity check 0.4999997615814209 +time 1.37 +78 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 61483.86328125 +err_fin 52091.96875 +sparsity check 0.4999997615814209 +time 1.33 +78 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 14208.9140625 +err_fin 6226.3701171875 +sparsity check 0.4999999701976776 +time 68.91 +78 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1509469.75 +err_fin 952065.8125 +sparsity check 0.49999999148505075 +time 138.85 +78 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1410296.25 +err_fin 886377.25 +sparsity check 0.49999999148505075 +time 139.19 +78 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 512088.1875 +err_fin 409127.8125 +sparsity check 0.49999999148505075 +time 136.74 +79 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 44558.09375 +err_fin 27206.3828125 +sparsity check 0.4999999701976776 +time 75.96 +79 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 29653.58203125 +err_fin 23229.486328125 +sparsity check 0.4999997615814209 +time 1.38 +79 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 26951.17578125 +err_fin 21621.861328125 +sparsity check 0.4999997615814209 +time 1.35 +79 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 3706.9072265625 +err_fin 1084.0416259765625 +sparsity check 0.4999999701976776 +time 68.97 +79 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 924403.75 +err_fin 530493.5 +sparsity check 0.49999999148505075 +time 138.87 +79 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 843905.0 +err_fin 485972.6875 +sparsity check 0.49999999148505075 +time 139.17 +79 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 456901.625 +err_fin 296230.0 +sparsity check 0.49999999148505075 +time 136.78 +model.embed_tokens.weight tensor(2.5520e-06) +model.layers.0.self_attn.q_proj.weight tensor(0.0083) +model.layers.0.self_attn.k_proj.weight tensor(0.0117) +model.layers.0.self_attn.v_proj.weight tensor(0.0441) +model.layers.0.self_attn.o_proj.weight tensor(3.3528e-06) +model.layers.0.mlp.gate_proj.weight tensor(0.0001) +model.layers.0.mlp.up_proj.weight tensor(0.0001) +model.layers.0.mlp.down_proj.weight tensor(0.0047) +51414.78756856918 +Dataset: wikitext2 +Evaluating ... +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +Perplexity: 3.443130 diff --git a/logs/llama2-70-0.5-fix-mask b/logs/llama2-70-0.5-fix-mask new file mode 100644 index 0000000..990a313 --- /dev/null +++ b/logs/llama2-70-0.5-fix-mask @@ -0,0 +1,4020 @@ +Running on dev: cuda:0 +loading llama +llama loaded +Starting... on device cuda:0 +model.layers.0.self_attn.q_proj.weight torch.Size([8192, 8192]) (8192, 8192) 0.1 +model.layers.0.self_attn.k_proj.weight torch.Size([1024, 8192]) (1024, 8192) 0.2 +model.layers.0.mlp.gate_proj.weight torch.Size([28672, 8192]) (8192, 28672) 0.2 +Ready. +0 self_attn.q_proj +Pruning ... +0.49272334575653076 0.09272335469722748 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.416445870607917 0.9709505944546686 1.0 +err_prefin 34.08806610107422 +err_fin 7.878982067108154 +sparsity check 0.49272334575653076 +time 74.93 +0 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 55.34233474731445 +err_fin 13.895328521728516 +sparsity check 0.49999988079071045 +time 1.30 +0 self_attn.v_proj +Pruning ... +0.4999997615814209 0.1999988555908203 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0884365910305762 0.9709505944546686 1.0 +err_prefin 3.7334351539611816 +err_fin 1.9356110095977783 +sparsity check 0.4999997615814209 +time 1.30 +0 self_attn.o_proj +Pruning ... +0.4912364035844803 0.091236412525177 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4115339808813105 0.9709505944546686 1.0 +err_prefin 1.4021973609924316 +err_fin 0.05483083426952362 +sparsity check 0.4912364035844803 +time 67.76 +0 mlp.gate_proj +Pruning ... +0.49999311566352844 0.1999758630990982 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968090810583558 0.9709505944546686 1.0 +err_prefin 547.6597900390625 +err_fin 144.9074249267578 +sparsity check 0.49999311566352844 +time 137.57 +0 mlp.up_proj +Pruning ... +0.49999311566352844 0.1999758630990982 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968090810583558 0.9709505944546686 1.0 +err_prefin 552.895263671875 +err_fin 144.273193359375 +sparsity check 0.49999311566352844 +time 137.66 +0 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 9.972746849060059 +err_fin 4.442782402038574 +sparsity check 0.49999999574252535 +time 135.36 +1 self_attn.q_proj +Pruning ... +0.49007223546504974 0.09007224440574646 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4076615027389625 0.9709505944546686 1.0 +err_prefin 458.649658203125 +err_fin 89.62268829345703 +sparsity check 0.49007223546504974 +time 74.90 +1 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 545.6114501953125 +err_fin 140.63705444335938 +sparsity check 0.49999988079071045 +time 1.31 +1 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 25.592668533325195 +err_fin 13.595357894897461 +sparsity check 0.49999988079071045 +time 1.30 +1 self_attn.o_proj +Pruning ... +0.4849788695573807 0.08497887849807739 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3904346718109546 0.9709505944546686 1.0 +err_prefin 27.439220428466797 +err_fin 3.2743263244628906 +sparsity check 0.4849788695573807 +time 67.78 +1 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 4326.44580078125 +err_fin 962.2832641601562 +sparsity check 0.49999999574252535 +time 137.43 +1 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 4773.0712890625 +err_fin 1025.191162109375 +sparsity check 0.49999999574252535 +time 137.71 +1 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 185.38607788085938 +err_fin 147.3865509033203 +sparsity check 0.49999999574252535 +time 135.37 +2 self_attn.q_proj +Pruning ... +0.4988190531730652 0.0988190621137619 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4361914811403524 0.9709505944546686 1.0 +err_prefin 1570.6591796875 +err_fin 475.0574951171875 +sparsity check 0.4988190531730652 +time 74.82 +2 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 2552.8427734375 +err_fin 1128.4437255859375 +sparsity check 0.49999988079071045 +time 1.32 +2 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 133.1376190185547 +err_fin 89.30953979492188 +sparsity check 0.49999988079071045 +time 1.31 +2 self_attn.o_proj +Pruning ... +0.49749068915843964 0.09749069809913635 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.431941036871602 0.9709505944546686 1.0 +err_prefin 136.1435546875 +err_fin 31.312829971313477 +sparsity check 0.49749068915843964 +time 67.73 +2 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 17526.8671875 +err_fin 5676.291015625 +sparsity check 0.49999999574252535 +time 137.42 +2 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 18264.2578125 +err_fin 5755.36572265625 +sparsity check 0.49999999574252535 +time 137.61 +2 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 700.30712890625 +err_fin 593.2619018554688 +sparsity check 0.49999999574252535 +time 135.42 +3 self_attn.q_proj +Pruning ... +0.49996353685855865 0.09996354579925537 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4398306150793116 0.9709505944546686 1.0 +err_prefin 14849.3662109375 +err_fin 6321.51171875 +sparsity check 0.49996353685855865 +time 74.82 +3 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 10442.033203125 +err_fin 6449.4130859375 +sparsity check 0.49999988079071045 +time 1.33 +3 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 1802.367431640625 +err_fin 1313.1429443359375 +sparsity check 0.49999988079071045 +time 1.31 +3 self_attn.o_proj +Pruning ... +0.4996228814125061 0.09962289035320282 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4387496324175322 0.9709505944546686 1.0 +err_prefin 299.18115234375 +err_fin 110.90554809570312 +sparsity check 0.4996228814125061 +time 67.67 +3 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 39038.39453125 +err_fin 16544.859375 +sparsity check 0.49999999574252535 +time 137.43 +3 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 39166.4375 +err_fin 16271.880859375 +sparsity check 0.49999999574252535 +time 137.72 +3 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 962.98828125 +err_fin 852.9772338867188 +sparsity check 0.49999999574252535 +time 135.38 +4 self_attn.q_proj +Pruning ... +0.49993960559368134 0.09993961453437805 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4397547361857166 0.9709505944546686 1.0 +err_prefin 29343.0859375 +err_fin 14346.1953125 +sparsity check 0.49993960559368134 +time 74.83 +4 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 13446.9453125 +err_fin 8533.0625 +sparsity check 0.49999988079071045 +time 1.32 +4 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 3019.1005859375 +err_fin 2351.447998046875 +sparsity check 0.49999988079071045 +time 1.31 +4 self_attn.o_proj +Pruning ... +0.49829351902008057 0.09829352796077728 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4345133354141861 0.9709505944546686 1.0 +err_prefin 358.5370788574219 +err_fin 145.6417236328125 +sparsity check 0.49829351902008057 +time 67.70 +4 mlp.gate_proj +Pruning ... +0.4999933583395822 0.19997671246528625 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196809566462352 0.9709505944546686 1.0 +err_prefin 64526.4375 +err_fin 32204.076171875 +sparsity check 0.4999933583395822 +time 137.40 +4 mlp.up_proj +Pruning ... +0.499943665095738 0.19980278611183167 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196710130561604 0.9709505944546686 1.0 +err_prefin 63318.7421875 +err_fin 31148.4375 +sparsity check 0.499943665095738 +time 137.70 +4 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 1476.088134765625 +err_fin 1328.525146484375 +sparsity check 0.49999999574252535 +time 135.37 +5 self_attn.q_proj +Pruning ... +0.49997615814208984 0.09997616708278656 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4398706297009556 0.9709505944546686 1.0 +err_prefin 45802.046875 +err_fin 24824.36328125 +sparsity check 0.49997615814208984 +time 74.82 +5 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 19897.65625 +err_fin 14027.8740234375 +sparsity check 0.49999988079071045 +time 1.32 +5 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 4540.630859375 +err_fin 3649.612548828125 +sparsity check 0.49999988079071045 +time 1.31 +5 self_attn.o_proj +Pruning ... +0.4997566342353821 0.0997566431760788 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4391742849238163 0.9709505944546686 1.0 +err_prefin 603.4962768554688 +err_fin 235.88951110839844 +sparsity check 0.4997566342353821 +time 67.68 +5 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 81626.109375 +err_fin 44528.7421875 +sparsity check 0.49999999574252535 +time 137.42 +5 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 79629.28125 +err_fin 42902.3046875 +sparsity check 0.49999999574252535 +time 137.70 +5 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2072.517578125 +err_fin 1853.76708984375 +sparsity check 0.49999999574252535 +time 135.33 +6 self_attn.q_proj +Pruning ... +0.4999881684780121 0.0999881774187088 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399087049976413 0.9709505944546686 1.0 +err_prefin 52415.0 +err_fin 29954.88671875 +sparsity check 0.4999881684780121 +time 74.80 +6 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 21953.89453125 +err_fin 16100.9296875 +sparsity check 0.49999988079071045 +time 1.31 +6 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 5227.771484375 +err_fin 4343.818359375 +sparsity check 0.49999988079071045 +time 1.31 +6 self_attn.o_proj +Pruning ... +0.4975365102291107 0.09753651916980743 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4320881330342738 0.9709505944546686 1.0 +err_prefin 1031.421142578125 +err_fin 476.410888671875 +sparsity check 0.4975365102291107 +time 67.68 +6 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 108566.34375 +err_fin 61947.36328125 +sparsity check 0.49999999574252535 +time 137.41 +6 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 105072.3828125 +err_fin 59408.34765625 +sparsity check 0.49999999574252535 +time 137.70 +6 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 3063.17919921875 +err_fin 2773.345947265625 +sparsity check 0.49999999574252535 +time 135.35 +7 self_attn.q_proj +Pruning ... +0.4999881684780121 0.0999881774187088 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399087049976413 0.9709505944546686 1.0 +err_prefin 86644.46875 +err_fin 52316.3046875 +sparsity check 0.4999881684780121 +time 74.80 +7 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 30040.24609375 +err_fin 23463.05859375 +sparsity check 0.49999988079071045 +time 1.33 +7 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 7565.7626953125 +err_fin 6453.3251953125 +sparsity check 0.49999988079071045 +time 1.30 +7 self_attn.o_proj +Pruning ... +0.4947527199983597 0.0947527289390564 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.42308851870487 0.9709505944546686 1.0 +err_prefin 1310.8568115234375 +err_fin 573.57177734375 +sparsity check 0.4947527199983597 +time 67.72 +7 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 134358.71875 +err_fin 80114.0625 +sparsity check 0.49999999574252535 +time 137.41 +7 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 129736.1796875 +err_fin 76680.21875 +sparsity check 0.49999999574252535 +time 137.71 +7 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 4323.55224609375 +err_fin 3912.0703125 +sparsity check 0.49999999574252535 +time 135.35 +8 self_attn.q_proj +Pruning ... +0.4999881684780121 0.0999881774187088 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399087049976413 0.9709505944546686 1.0 +err_prefin 75884.046875 +err_fin 47366.97265625 +sparsity check 0.4999881684780121 +time 74.81 +8 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 29886.185546875 +err_fin 22489.130859375 +sparsity check 0.49999988079071045 +time 1.33 +8 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 7067.2841796875 +err_fin 6060.041015625 +sparsity check 0.49999988079071045 +time 1.31 +8 self_attn.o_proj +Pruning ... +0.4958852231502533 0.09588523209095001 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4267652832437898 0.9709505944546686 1.0 +err_prefin 2190.83349609375 +err_fin 922.5817260742188 +sparsity check 0.4958852231502533 +time 67.72 +8 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 185066.015625 +err_fin 115194.484375 +sparsity check 0.49999999574252535 +time 137.41 +8 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 174999.25 +err_fin 108095.296875 +sparsity check 0.49999999574252535 +time 137.71 +8 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 8867.8662109375 +err_fin 8053.55859375 +sparsity check 0.49999999574252535 +time 135.32 +9 self_attn.q_proj +Pruning ... +0.4999762624502182 0.09997627139091492 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4398709603896664 0.9709505944546686 1.0 +err_prefin 83692.3359375 +err_fin 56241.5546875 +sparsity check 0.4999762624502182 +time 74.82 +9 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 29946.142578125 +err_fin 24662.328125 +sparsity check 0.49999988079071045 +time 1.31 +9 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 10846.4208984375 +err_fin 9679.2373046875 +sparsity check 0.49999988079071045 +time 1.31 +9 self_attn.o_proj +Pruning ... +0.49749529361724854 0.09749530255794525 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4319558197977513 0.9709505944546686 1.0 +err_prefin 1225.5751953125 +err_fin 485.3004455566406 +sparsity check 0.49749529361724854 +time 67.69 +9 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 236431.9375 +err_fin 154139.15625 +sparsity check 0.49999999574252535 +time 137.42 +9 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 222079.5625 +err_fin 143717.671875 +sparsity check 0.49999999574252535 +time 137.62 +9 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 7716.9306640625 +err_fin 7093.3818359375 +sparsity check 0.49999999574252535 +time 135.38 +10 self_attn.q_proj +Pruning ... +0.4999762624502182 0.09997627139091492 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4398709603896664 0.9709505944546686 1.0 +err_prefin 50366.90625 +err_fin 34555.5234375 +sparsity check 0.4999762624502182 +time 74.79 +10 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 20972.310546875 +err_fin 16446.255859375 +sparsity check 0.49999988079071045 +time 1.32 +10 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 10438.380859375 +err_fin 9284.658203125 +sparsity check 0.49999988079071045 +time 1.31 +10 self_attn.o_proj +Pruning ... +0.48955683410167694 0.08955684304237366 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4059394738501885 0.9709505944546686 1.0 +err_prefin 753.25927734375 +err_fin 324.17095947265625 +sparsity check 0.48955683410167694 +time 67.72 +10 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 286046.5 +err_fin 196094.625 +sparsity check 0.49999999574252535 +time 137.41 +10 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 267715.3125 +err_fin 182304.59375 +sparsity check 0.49999999574252535 +time 137.70 +10 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 8698.220703125 +err_fin 8076.32666015625 +sparsity check 0.49999999574252535 +time 135.36 +11 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 80485.90625 +err_fin 55793.3046875 +sparsity check 0.49998772144317627 +time 74.84 +11 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 33353.53125 +err_fin 26244.560546875 +sparsity check 0.49999988079071045 +time 1.31 +11 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 12479.7666015625 +err_fin 10959.146484375 +sparsity check 0.49999988079071045 +time 1.31 +11 self_attn.o_proj +Pruning ... +0.4854428768157959 0.08544288575649261 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3920235864263013 0.9709505944546686 1.0 +err_prefin 1232.4251708984375 +err_fin 523.7001953125 +sparsity check 0.4854428768157959 +time 67.75 +11 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 307951.625 +err_fin 215541.4375 +sparsity check 0.49999999574252535 +time 137.40 +11 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 289756.1875 +err_fin 201436.40625 +sparsity check 0.49999999574252535 +time 137.69 +11 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 9577.0673828125 +err_fin 8912.2724609375 +sparsity check 0.49999999574252535 +time 135.33 +12 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 68103.78125 +err_fin 47606.0390625 +sparsity check 0.49998772144317627 +time 74.81 +12 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 30560.9453125 +err_fin 23880.765625 +sparsity check 0.49999988079071045 +time 1.31 +12 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 9388.3828125 +err_fin 8111.275390625 +sparsity check 0.49999988079071045 +time 1.30 +12 self_attn.o_proj +Pruning ... +0.49501554667949677 0.09501555562019348 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4239437209889605 0.9709505944546686 1.0 +err_prefin 1512.551025390625 +err_fin 614.376220703125 +sparsity check 0.49501554667949677 +time 67.72 +12 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 304679.71875 +err_fin 214664.71875 +sparsity check 0.49999999574252535 +time 137.40 +12 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 289705.8125 +err_fin 203138.765625 +sparsity check 0.49999999574252535 +time 137.70 +12 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 10629.662109375 +err_fin 9844.0546875 +sparsity check 0.49999999574252535 +time 135.36 +13 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 89871.046875 +err_fin 63092.34765625 +sparsity check 0.49998772144317627 +time 74.97 +13 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 34686.421875 +err_fin 27442.31640625 +sparsity check 0.49999988079071045 +time 1.31 +13 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 12554.0126953125 +err_fin 11048.6611328125 +sparsity check 0.49999988079071045 +time 1.31 +13 self_attn.o_proj +Pruning ... +0.4901005029678345 0.09010051190853119 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4077558129857786 0.9709505944546686 1.0 +err_prefin 3843.938232421875 +err_fin 1864.849365234375 +sparsity check 0.4901005029678345 +time 67.88 +13 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 323137.3125 +err_fin 223854.53125 +sparsity check 0.49999999574252535 +time 137.80 +13 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 310574.3125 +err_fin 213911.125 +sparsity check 0.49999999574252535 +time 138.08 +13 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 13449.08203125 +err_fin 12456.4248046875 +sparsity check 0.49999999574252535 +time 135.74 +14 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 146839.21875 +err_fin 103904.828125 +sparsity check 0.49998772144317627 +time 74.98 +14 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 49520.5625 +err_fin 40365.359375 +sparsity check 0.49999988079071045 +time 1.32 +14 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 17806.49609375 +err_fin 15857.8916015625 +sparsity check 0.49999988079071045 +time 1.31 +14 self_attn.o_proj +Pruning ... +0.49405381083488464 0.09405381977558136 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4208087027364587 0.9709505944546686 1.0 +err_prefin 5030.2236328125 +err_fin 2252.26171875 +sparsity check 0.49405381083488464 +time 67.87 +14 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 372289.125 +err_fin 265223.0 +sparsity check 0.49999999574252535 +time 137.89 +14 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 357630.1875 +err_fin 253357.3125 +sparsity check 0.49999999574252535 +time 138.15 +14 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 14419.9716796875 +err_fin 13439.255859375 +sparsity check 0.49999999574252535 +time 135.75 +15 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 108777.1171875 +err_fin 78836.0546875 +sparsity check 0.49998772144317627 +time 75.11 +15 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 37974.76953125 +err_fin 31038.19140625 +sparsity check 0.49999988079071045 +time 1.32 +15 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 21119.76171875 +err_fin 19116.50390625 +sparsity check 0.49999988079071045 +time 1.32 +15 self_attn.o_proj +Pruning ... +0.4920341372489929 0.09203414618968964 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4141739218068623 0.9709505944546686 1.0 +err_prefin 3777.62158203125 +err_fin 1862.2109375 +sparsity check 0.4920341372489929 +time 68.01 +15 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 401217.625 +err_fin 288907.78125 +sparsity check 0.49999999574252535 +time 138.17 +15 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 387066.1875 +err_fin 277026.03125 +sparsity check 0.49999999574252535 +time 138.33 +15 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 16409.4375 +err_fin 15280.708984375 +sparsity check 0.49999999574252535 +time 136.07 +16 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 105659.078125 +err_fin 77506.4921875 +sparsity check 0.49998772144317627 +time 75.16 +16 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 41239.171875 +err_fin 33963.78515625 +sparsity check 0.49999988079071045 +time 1.31 +16 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 19672.27734375 +err_fin 17797.333984375 +sparsity check 0.49999988079071045 +time 1.31 +16 self_attn.o_proj +Pruning ... +0.49631543457508087 0.09631544351577759 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4281563935764638 0.9709505944546686 1.0 +err_prefin 4274.4638671875 +err_fin 1988.13818359375 +sparsity check 0.49631543457508087 +time 68.03 +16 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 419959.71875 +err_fin 305134.8125 +sparsity check 0.49999999574252535 +time 138.14 +16 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 406808.25 +err_fin 294106.625 +sparsity check 0.49999999574252535 +time 138.42 +16 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 17283.220703125 +err_fin 16127.3623046875 +sparsity check 0.49999999574252535 +time 136.04 +17 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 134928.78125 +err_fin 97584.6640625 +sparsity check 0.49998772144317627 +time 75.12 +17 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 48884.97265625 +err_fin 39521.5078125 +sparsity check 0.49999988079071045 +time 1.34 +17 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 17418.22265625 +err_fin 15397.0830078125 +sparsity check 0.49999988079071045 +time 1.31 +17 self_attn.o_proj +Pruning ... +0.4991466701030731 0.09914667904376984 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.437235368404992 0.9709505944546686 1.0 +err_prefin 7362.8935546875 +err_fin 2987.857421875 +sparsity check 0.4991466701030731 +time 67.96 +17 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 416129.3125 +err_fin 297964.0625 +sparsity check 0.49999999574252535 +time 138.14 +17 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 404107.375 +err_fin 287668.75 +sparsity check 0.49999999574252535 +time 138.30 +17 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 21806.95703125 +err_fin 19919.7734375 +sparsity check 0.49999999574252535 +time 136.05 +18 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 127766.4375 +err_fin 93001.578125 +sparsity check 0.49998772144317627 +time 75.10 +18 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 45825.41796875 +err_fin 37889.796875 +sparsity check 0.49999988079071045 +time 1.32 +18 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 26613.5078125 +err_fin 24281.3984375 +sparsity check 0.49999988079071045 +time 1.31 +18 self_attn.o_proj +Pruning ... +0.4893995672464371 0.08939957618713379 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.405413088249908 0.9709505944546686 1.0 +err_prefin 6830.02197265625 +err_fin 2966.24267578125 +sparsity check 0.4893995672464371 +time 68.04 +18 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 459914.1875 +err_fin 330994.0625 +sparsity check 0.49999999574252535 +time 138.13 +18 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 443396.4375 +err_fin 317242.5625 +sparsity check 0.49999999574252535 +time 138.39 +18 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 23930.73046875 +err_fin 21924.87890625 +sparsity check 0.49999999574252535 +time 136.03 +19 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 151214.796875 +err_fin 111182.21875 +sparsity check 0.49998772144317627 +time 75.11 +19 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 49385.66796875 +err_fin 42071.76171875 +sparsity check 0.49999988079071045 +time 1.31 +19 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 31174.37109375 +err_fin 28941.91796875 +sparsity check 0.49999988079071045 +time 1.30 +19 self_attn.o_proj +Pruning ... +0.4876689612865448 0.08766897022724152 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3995914973239965 0.9709505944546686 1.0 +err_prefin 4717.5537109375 +err_fin 2170.08056640625 +sparsity check 0.4876689612865448 +time 68.04 +19 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 499092.0625 +err_fin 361368.1875 +sparsity check 0.49999999574252535 +time 138.15 +19 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 481397.5 +err_fin 346922.6875 +sparsity check 0.49999999574252535 +time 138.34 +19 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 24736.42578125 +err_fin 22886.87890625 +sparsity check 0.49999999574252535 +time 136.05 +20 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 133521.78125 +err_fin 99405.609375 +sparsity check 0.49998772144317627 +time 75.12 +20 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 44213.140625 +err_fin 37228.4921875 +sparsity check 0.49999988079071045 +time 1.31 +20 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 25626.75390625 +err_fin 23838.16015625 +sparsity check 0.49999988079071045 +time 1.31 +20 self_attn.o_proj +Pruning ... +0.4920240491628647 0.0920240581035614 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4141406060638388 0.9709505944546686 1.0 +err_prefin 3953.66943359375 +err_fin 1568.969482421875 +sparsity check 0.4920240491628647 +time 68.04 +20 mlp.gate_proj +Pruning ... +0.49999337111200604 0.19997675716876984 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968095920098791 0.9709505944546686 1.0 +err_prefin 537636.25 +err_fin 392180.03125 +sparsity check 0.49999337111200604 +time 138.10 +20 mlp.up_proj +Pruning ... +0.49999337111200604 0.19997675716876984 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968095920098791 0.9709505944546686 1.0 +err_prefin 518775.59375 +err_fin 376679.75 +sparsity check 0.49999337111200604 +time 138.40 +20 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 25165.505859375 +err_fin 23440.7421875 +sparsity check 0.49999999574252535 +time 136.03 +21 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 85106.8828125 +err_fin 63341.171875 +sparsity check 0.49998772144317627 +time 75.11 +21 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 32309.31640625 +err_fin 26476.8984375 +sparsity check 0.49999988079071045 +time 1.33 +21 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 22025.603515625 +err_fin 20266.328125 +sparsity check 0.49999988079071045 +time 1.31 +21 self_attn.o_proj +Pruning ... +0.48070839047431946 0.08070839941501617 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3756209634598644 0.9709505944546686 1.0 +err_prefin 4337.72265625 +err_fin 1998.4146728515625 +sparsity check 0.48070839047431946 +time 68.04 +21 mlp.gate_proj +Pruning ... +0.49999337111200604 0.19997675716876984 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968095920098791 0.9709505944546686 1.0 +err_prefin 561466.125 +err_fin 410761.0625 +sparsity check 0.49999337111200604 +time 138.13 +21 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 545155.25 +err_fin 396950.25 +sparsity check 0.49999999574252535 +time 138.40 +21 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 26194.49609375 +err_fin 24549.0703125 +sparsity check 0.49999999574252535 +time 136.06 +22 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 82049.421875 +err_fin 61513.953125 +sparsity check 0.49998772144317627 +time 75.11 +22 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 35360.390625 +err_fin 30303.45703125 +sparsity check 0.49999988079071045 +time 1.33 +22 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 24802.689453125 +err_fin 23051.39453125 +sparsity check 0.49999988079071045 +time 1.32 +22 self_attn.o_proj +Pruning ... +0.49824874103069305 0.09824874997138977 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.434370141940453 0.9709505944546686 1.0 +err_prefin 3671.6357421875 +err_fin 1467.75341796875 +sparsity check 0.49824874103069305 +time 67.99 +22 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 588264.875 +err_fin 431812.5 +sparsity check 0.49999999574252535 +time 138.15 +22 mlp.up_proj +Pruning ... +0.49999337111200604 0.19997675716876984 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968095920098791 0.9709505944546686 1.0 +err_prefin 572260.25 +err_fin 418173.1875 +sparsity check 0.49999337111200604 +time 138.41 +22 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 26897.529296875 +err_fin 25304.95703125 +sparsity check 0.49999999574252535 +time 136.02 +23 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 117155.9375 +err_fin 87148.28125 +sparsity check 0.49998772144317627 +time 75.12 +23 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 48022.50390625 +err_fin 40585.8671875 +sparsity check 0.49999988079071045 +time 1.32 +23 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 26600.05078125 +err_fin 24621.19921875 +sparsity check 0.49999988079071045 +time 1.31 +23 self_attn.o_proj +Pruning ... +0.4934813380241394 0.09348134696483612 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4189351582836252 0.9709505944546686 1.0 +err_prefin 4764.31787109375 +err_fin 2131.6533203125 +sparsity check 0.4934813380241394 +time 67.99 +23 mlp.gate_proj +Pruning ... +0.49999337111200604 0.19997675716876984 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968095920098791 0.9709505944546686 1.0 +err_prefin 603782.4375 +err_fin 444955.90625 +sparsity check 0.49999337111200604 +time 138.13 +23 mlp.up_proj +Pruning ... +0.49999337111200604 0.19997675716876984 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968095920098791 0.9709505944546686 1.0 +err_prefin 588263.3125 +err_fin 431614.96875 +sparsity check 0.49999337111200604 +time 138.29 +23 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 27810.431640625 +err_fin 26119.39453125 +sparsity check 0.49999999574252535 +time 136.07 +24 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 103867.359375 +err_fin 76765.421875 +sparsity check 0.49998772144317627 +time 75.11 +24 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 41862.609375 +err_fin 34830.8515625 +sparsity check 0.49999988079071045 +time 1.32 +24 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 20143.111328125 +err_fin 18188.3203125 +sparsity check 0.49999988079071045 +time 1.31 +24 self_attn.o_proj +Pruning ... +0.49733252823352814 0.09733253717422485 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4314330393829158 0.9709505944546686 1.0 +err_prefin 4534.4150390625 +err_fin 2072.19873046875 +sparsity check 0.49733252823352814 +time 68.04 +24 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 584705.3125 +err_fin 432536.96875 +sparsity check 0.49999999574252535 +time 138.12 +24 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 574857.375 +err_fin 423772.9375 +sparsity check 0.49999999574252535 +time 138.39 +24 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 27941.52734375 +err_fin 26251.923828125 +sparsity check 0.49999999574252535 +time 136.05 +25 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 120071.890625 +err_fin 88952.328125 +sparsity check 0.49998772144317627 +time 75.11 +25 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 47751.328125 +err_fin 39539.93359375 +sparsity check 0.49999988079071045 +time 1.31 +25 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 20739.15234375 +err_fin 18690.85546875 +sparsity check 0.49999988079071045 +time 1.30 +25 self_attn.o_proj +Pruning ... +0.49525563418865204 0.09525564312934875 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4247239201127866 0.9709505944546686 1.0 +err_prefin 10895.736328125 +err_fin 5156.1416015625 +sparsity check 0.49525563418865204 +time 67.99 +25 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 557226.375 +err_fin 400755.0625 +sparsity check 0.49999999574252535 +time 138.09 +25 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 553179.4375 +err_fin 396006.375 +sparsity check 0.49999999574252535 +time 138.31 +25 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 37830.1953125 +err_fin 34984.01953125 +sparsity check 0.49999999574252535 +time 136.07 +26 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 175385.265625 +err_fin 129243.6875 +sparsity check 0.49998772144317627 +time 75.11 +26 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 61568.4375 +err_fin 52269.25 +sparsity check 0.49999988079071045 +time 1.32 +26 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 31962.962890625 +err_fin 29029.134765625 +sparsity check 0.49999988079071045 +time 1.30 +26 self_attn.o_proj +Pruning ... +0.495570570230484 0.09557057917118073 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4257458876633167 0.9709505944546686 1.0 +err_prefin 12212.02734375 +err_fin 6031.96142578125 +sparsity check 0.495570570230484 +time 67.99 +26 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 632510.375 +err_fin 460037.625 +sparsity check 0.49999999574252535 +time 138.11 +26 mlp.up_proj +Pruning ... +0.49999337111200604 0.19997675716876984 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968095920098791 0.9709505944546686 1.0 +err_prefin 624925.6875 +err_fin 452469.21875 +sparsity check 0.49999337111200604 +time 138.41 +26 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 37664.9296875 +err_fin 35469.4453125 +sparsity check 0.49999999574252535 +time 136.06 +27 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 79798.0078125 +err_fin 60004.65625 +sparsity check 0.49998772144317627 +time 75.12 +27 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 36520.203125 +err_fin 30774.515625 +sparsity check 0.49999988079071045 +time 1.31 +27 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 26921.06640625 +err_fin 25074.23828125 +sparsity check 0.49999988079071045 +time 1.31 +27 self_attn.o_proj +Pruning ... +0.4959203898906708 0.09592039883136749 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.426879112102447 0.9709505944546686 1.0 +err_prefin 5038.2265625 +err_fin 2082.519775390625 +sparsity check 0.4959203898906708 +time 68.01 +27 mlp.gate_proj +Pruning ... +0.49999337111200604 0.19997675716876984 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968095920098791 0.9709505944546686 1.0 +err_prefin 669772.1875 +err_fin 490635.5625 +sparsity check 0.49999337111200604 +time 138.11 +27 mlp.up_proj +Pruning ... +0.49999337111200604 0.19997675716876984 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968095920098791 0.9709505944546686 1.0 +err_prefin 659654.625 +err_fin 480803.78125 +sparsity check 0.49999337111200604 +time 138.32 +27 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 39647.5390625 +err_fin 37466.8203125 +sparsity check 0.49999999574252535 +time 136.05 +28 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 145252.265625 +err_fin 109142.9140625 +sparsity check 0.49998772144317627 +time 75.11 +28 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 56241.96484375 +err_fin 48937.75 +sparsity check 0.49999988079071045 +time 1.33 +28 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 40909.66796875 +err_fin 38057.453125 +sparsity check 0.49999988079071045 +time 1.31 +28 self_attn.o_proj +Pruning ... +0.49546174705028534 0.09546175599098206 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4253929438441721 0.9709505944546686 1.0 +err_prefin 8529.5234375 +err_fin 4099.28125 +sparsity check 0.49546174705028534 +time 68.01 +28 mlp.gate_proj +Pruning ... +0.49999337111200604 0.19997675716876984 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968095920098791 0.9709505944546686 1.0 +err_prefin 706189.625 +err_fin 521497.25 +sparsity check 0.49999337111200604 +time 138.13 +28 mlp.up_proj +Pruning ... +0.49999337111200604 0.19997675716876984 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968095920098791 0.9709505944546686 1.0 +err_prefin 695184.9375 +err_fin 510882.78125 +sparsity check 0.49999337111200604 +time 138.42 +28 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 41796.6484375 +err_fin 39637.125 +sparsity check 0.49999999574252535 +time 136.02 +29 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 143634.5 +err_fin 108505.09375 +sparsity check 0.49998772144317627 +time 74.95 +29 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 52403.703125 +err_fin 45089.0 +sparsity check 0.49999988079071045 +time 1.32 +29 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 43642.34375 +err_fin 40687.1796875 +sparsity check 0.49999988079071045 +time 1.31 +29 self_attn.o_proj +Pruning ... +0.49339035153388977 0.09339036047458649 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4186368717223352 0.9709505944546686 1.0 +err_prefin 8886.2861328125 +err_fin 4260.4326171875 +sparsity check 0.49339035153388977 +time 67.86 +29 mlp.gate_proj +Pruning ... +0.49999337111200604 0.19997675716876984 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968095920098791 0.9709505944546686 1.0 +err_prefin 747150.25 +err_fin 554973.375 +sparsity check 0.49999337111200604 +time 137.87 +29 mlp.up_proj +Pruning ... +0.49999337111200604 0.19997675716876984 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968095920098791 0.9709505944546686 1.0 +err_prefin 733530.8125 +err_fin 542177.375 +sparsity check 0.49999337111200604 +time 138.15 +29 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 43809.9921875 +err_fin 41724.9765625 +sparsity check 0.49999999574252535 +time 135.81 +30 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 76537.53125 +err_fin 58076.2109375 +sparsity check 0.49998772144317627 +time 74.81 +30 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 28479.72265625 +err_fin 24591.79296875 +sparsity check 0.49999988079071045 +time 1.32 +30 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 39850.8671875 +err_fin 37386.2578125 +sparsity check 0.49999988079071045 +time 1.31 +30 self_attn.o_proj +Pruning ... +0.498422771692276 0.09842278063297272 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4349264835682236 0.9709505944546686 1.0 +err_prefin 4446.6142578125 +err_fin 2080.4375 +sparsity check 0.498422771692276 +time 67.71 +30 mlp.gate_proj +Pruning ... +0.49999337111200604 0.19997675716876984 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968095920098791 0.9709505944546686 1.0 +err_prefin 791530.875 +err_fin 591407.3125 +sparsity check 0.49999337111200604 +time 137.76 +30 mlp.up_proj +Pruning ... +0.49999337111200604 0.19997675716876984 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968095920098791 0.9709505944546686 1.0 +err_prefin 774201.875 +err_fin 575441.0625 +sparsity check 0.49999337111200604 +time 138.09 +30 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 45369.296875 +err_fin 43251.7734375 +sparsity check 0.49999999574252535 +time 135.73 +31 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 151977.609375 +err_fin 115422.6953125 +sparsity check 0.49998772144317627 +time 74.80 +31 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 53647.578125 +err_fin 46616.53125 +sparsity check 0.49999988079071045 +time 1.31 +31 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 43322.09375 +err_fin 39935.734375 +sparsity check 0.49999988079071045 +time 1.31 +31 self_attn.o_proj +Pruning ... +0.4965995103120804 0.0965995192527771 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4290732843687386 0.9709505944546686 1.0 +err_prefin 8435.0986328125 +err_fin 4382.162109375 +sparsity check 0.4965995103120804 +time 67.67 +31 mlp.gate_proj +Pruning ... +0.49999337111200604 0.19997675716876984 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968095920098791 0.9709505944546686 1.0 +err_prefin 824860.75 +err_fin 621916.0625 +sparsity check 0.49999337111200604 +time 137.68 +31 mlp.up_proj +Pruning ... +0.4999863122190748 0.19995205104351044 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1967954719582186 0.9709505944546686 1.0 +err_prefin 804884.875 +err_fin 604100.0625 +sparsity check 0.4999863122190748 +time 137.92 +31 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 46429.7890625 +err_fin 44336.328125 +sparsity check 0.49999999574252535 +time 135.72 +32 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 169728.5 +err_fin 129772.34375 +sparsity check 0.49998772144317627 +time 74.80 +32 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 56991.4921875 +err_fin 50299.40234375 +sparsity check 0.49999988079071045 +time 1.31 +32 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 39274.08203125 +err_fin 36705.93359375 +sparsity check 0.49999988079071045 +time 1.31 +32 self_attn.o_proj +Pruning ... +0.497482493519783 0.09748250246047974 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4319147233598155 0.9709505944546686 1.0 +err_prefin 6503.97265625 +err_fin 3032.211669921875 +sparsity check 0.497482493519783 +time 67.71 +32 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 844488.0 +err_fin 634982.75 +sparsity check 0.49999999574252535 +time 137.59 +32 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 825636.625 +err_fin 618152.875 +sparsity check 0.49999999574252535 +time 137.93 +32 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 49154.30859375 +err_fin 46833.76953125 +sparsity check 0.49999999574252535 +time 135.58 +33 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 241426.328125 +err_fin 182804.09375 +sparsity check 0.49998772144317627 +time 74.84 +33 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 71084.65625 +err_fin 61727.87109375 +sparsity check 0.49999988079071045 +time 1.31 +33 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 43832.87890625 +err_fin 40348.328125 +sparsity check 0.49999988079071045 +time 1.30 +33 self_attn.o_proj +Pruning ... +0.4924851059913635 0.09248511493206024 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.415661446586966 0.9709505944546686 1.0 +err_prefin 12809.04296875 +err_fin 6111.228515625 +sparsity check 0.4924851059913635 +time 67.80 +33 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 855442.375 +err_fin 634631.125 +sparsity check 0.49999999574252535 +time 137.85 +33 mlp.up_proj +Pruning ... +0.49999337111200604 0.19997675716876984 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968095920098791 0.9709505944546686 1.0 +err_prefin 839932.0 +err_fin 620200.3125 +sparsity check 0.49999337111200604 +time 138.06 +33 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 55860.78125 +err_fin 52946.34375 +sparsity check 0.49999999574252535 +time 135.80 +34 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 87907.765625 +err_fin 66005.96875 +sparsity check 0.49998772144317627 +time 74.79 +34 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 38007.6015625 +err_fin 31144.2890625 +sparsity check 0.49999988079071045 +time 1.32 +34 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 26247.046875 +err_fin 23801.80859375 +sparsity check 0.49999988079071045 +time 1.31 +34 self_attn.o_proj +Pruning ... +0.4965437799692154 0.09654378890991211 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4288935128557605 0.9709505944546686 1.0 +err_prefin 8917.4375 +err_fin 4313.02099609375 +sparsity check 0.4965437799692154 +time 67.69 +34 mlp.gate_proj +Pruning ... +0.49999999148505075 0.19999994337558746 0.4428571505205972 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968228405567136 0.9709505944546686 1.0 +err_prefin 883252.625 +err_fin 658956.625 +sparsity check 0.49999999148505075 +time 137.71 +34 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 867057.875 +err_fin 643740.75 +sparsity check 0.49999999574252535 +time 138.06 +34 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 60795.9921875 +err_fin 57895.12109375 +sparsity check 0.49999999574252535 +time 135.74 +35 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 142988.84375 +err_fin 108514.25 +sparsity check 0.49998772144317627 +time 74.81 +35 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 47445.8515625 +err_fin 39947.671875 +sparsity check 0.49999988079071045 +time 1.31 +35 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 35960.7734375 +err_fin 33210.8359375 +sparsity check 0.49999988079071045 +time 1.31 +35 self_attn.o_proj +Pruning ... +0.4856218099594116 0.08562181890010834 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3926352497431633 0.9709505944546686 1.0 +err_prefin 9172.3349609375 +err_fin 4169.6044921875 +sparsity check 0.4856218099594116 +time 67.70 +35 mlp.gate_proj +Pruning ... +0.4999863122190748 0.19995205104351044 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1967954719582186 0.9709505944546686 1.0 +err_prefin 936726.75 +err_fin 700609.5 +sparsity check 0.4999863122190748 +time 137.63 +35 mlp.up_proj +Pruning ... +0.4999863122190748 0.19995205104351044 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1967954719582186 0.9709505944546686 1.0 +err_prefin 917721.875 +err_fin 683108.25 +sparsity check 0.4999863122190748 +time 137.89 +35 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 65276.53125 +err_fin 62261.46875 +sparsity check 0.49999999574252535 +time 135.69 +36 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 136184.03125 +err_fin 103272.765625 +sparsity check 0.49998772144317627 +time 74.79 +36 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 45948.1953125 +err_fin 39011.28125 +sparsity check 0.49999988079071045 +time 1.31 +36 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 34898.28515625 +err_fin 32478.26171875 +sparsity check 0.49999988079071045 +time 1.31 +36 self_attn.o_proj +Pruning ... +0.4887405186891556 0.0887405276298523 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4032024212222955 0.9709505944546686 1.0 +err_prefin 6714.736328125 +err_fin 2944.044677734375 +sparsity check 0.4887405186891556 +time 67.77 +36 mlp.gate_proj +Pruning ... +0.4999863122190748 0.19995205104351044 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1967954719582186 0.9709505944546686 1.0 +err_prefin 958992.0625 +err_fin 716187.625 +sparsity check 0.4999863122190748 +time 137.61 +36 mlp.up_proj +Pruning ... +0.4999863122190748 0.19995205104351044 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1967954719582186 0.9709505944546686 1.0 +err_prefin 940117.875 +err_fin 698780.0 +sparsity check 0.4999863122190748 +time 137.96 +36 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 72726.25 +err_fin 69204.3203125 +sparsity check 0.49999999574252535 +time 135.66 +37 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 214821.875 +err_fin 161912.28125 +sparsity check 0.49998772144317627 +time 74.81 +37 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 62873.0390625 +err_fin 53841.98828125 +sparsity check 0.49999988079071045 +time 1.31 +37 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 45130.265625 +err_fin 41807.7578125 +sparsity check 0.49999988079071045 +time 1.31 +37 self_attn.o_proj +Pruning ... +0.48692476749420166 0.08692477643489838 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3970715469171076 0.9709505944546686 1.0 +err_prefin 13892.369140625 +err_fin 5770.4619140625 +sparsity check 0.48692476749420166 +time 67.72 +37 mlp.gate_proj +Pruning ... +0.4999863122190748 0.19995205104351044 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1967954719582186 0.9709505944546686 1.0 +err_prefin 1001696.25 +err_fin 742198.4375 +sparsity check 0.4999863122190748 +time 137.57 +37 mlp.up_proj +Pruning ... +0.4999929368495941 0.19997523725032806 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968087233910638 0.9709505944546686 1.0 +err_prefin 981569.75 +err_fin 723922.375 +sparsity check 0.4999929368495941 +time 137.86 +37 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 79529.15625 +err_fin 75579.359375 +sparsity check 0.49999999574252535 +time 135.58 +38 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 241476.09375 +err_fin 179820.125 +sparsity check 0.49998772144317627 +time 74.80 +38 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 70421.0859375 +err_fin 58469.70703125 +sparsity check 0.49999988079071045 +time 1.31 +38 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 47591.578125 +err_fin 43447.296875 +sparsity check 0.49999988079071045 +time 1.30 +38 self_attn.o_proj +Pruning ... +0.4874471426010132 0.0874471515417099 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.398841434910925 0.9709505944546686 1.0 +err_prefin 22931.5625 +err_fin 9835.1083984375 +sparsity check 0.4874471426010132 +time 67.73 +38 mlp.gate_proj +Pruning ... +0.4999929368495941 0.19997523725032806 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968087233910638 0.9709505944546686 1.0 +err_prefin 1032398.5 +err_fin 762469.0 +sparsity check 0.4999929368495941 +time 137.69 +38 mlp.up_proj +Pruning ... +0.4999929368495941 0.19997523725032806 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968087233910638 0.9709505944546686 1.0 +err_prefin 1014006.0 +err_fin 745387.375 +sparsity check 0.4999929368495941 +time 138.04 +38 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 89015.859375 +err_fin 84436.0859375 +sparsity check 0.49999999574252535 +time 135.75 +39 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 296672.65625 +err_fin 219949.171875 +sparsity check 0.49998772144317627 +time 74.79 +39 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 87629.203125 +err_fin 76056.8828125 +sparsity check 0.49999988079071045 +time 1.31 +39 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 51629.421875 +err_fin 47132.328125 +sparsity check 0.49999988079071045 +time 1.30 +39 self_attn.o_proj +Pruning ... +0.4940083473920822 0.09400835633277893 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.420660116755753 0.9709505944546686 1.0 +err_prefin 29438.013671875 +err_fin 11882.00390625 +sparsity check 0.4940083473920822 +time 67.72 +39 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 1044343.8125 +err_fin 760460.25 +sparsity check 0.49999999574252535 +time 137.67 +39 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 1028718.3125 +err_fin 745674.125 +sparsity check 0.49999999574252535 +time 137.92 +39 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 108696.53125 +err_fin 101968.9375 +sparsity check 0.49999999574252535 +time 135.71 +40 self_attn.q_proj +Pruning ... +0.4999999850988388 0.09999999403953552 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399461639197515 0.9709505944546686 1.0 +err_prefin 276137.53125 +err_fin 200761.25 +sparsity check 0.4999999850988388 +time 74.80 +40 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 76791.5546875 +err_fin 66313.9140625 +sparsity check 0.49999988079071045 +time 1.31 +40 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 58183.0546875 +err_fin 52631.859375 +sparsity check 0.49999988079071045 +time 1.30 +40 self_attn.o_proj +Pruning ... +0.49458740651607513 0.09458741545677185 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4225500160701596 0.9709505944546686 1.0 +err_prefin 43815.84375 +err_fin 21857.01171875 +sparsity check 0.49458740651607513 +time 67.72 +40 mlp.gate_proj +Pruning ... +0.4999929368495941 0.19997523725032806 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968087233910638 0.9709505944546686 1.0 +err_prefin 1088925.75 +err_fin 777796.5 +sparsity check 0.4999929368495941 +time 137.70 +40 mlp.up_proj +Pruning ... +0.4999929368495941 0.19997523725032806 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968087233910638 0.9709505944546686 1.0 +err_prefin 1070080.0 +err_fin 760568.125 +sparsity check 0.4999929368495941 +time 138.03 +40 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 122476.9921875 +err_fin 115007.2890625 +sparsity check 0.49999999574252535 +time 135.74 +41 self_attn.q_proj +Pruning ... +0.4999999850988388 0.09999999403953552 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399461639197515 0.9709505944546686 1.0 +err_prefin 226480.96875 +err_fin 162543.71875 +sparsity check 0.4999999850988388 +time 74.80 +41 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 66330.421875 +err_fin 56078.65625 +sparsity check 0.49999988079071045 +time 1.32 +41 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 50970.06640625 +err_fin 46188.6875 +sparsity check 0.49999988079071045 +time 1.31 +41 self_attn.o_proj +Pruning ... +0.49377967417240143 0.09377968311309814 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4199122238458421 0.9709505944546686 1.0 +err_prefin 40533.98046875 +err_fin 18104.935546875 +sparsity check 0.49377967417240143 +time 67.72 +41 mlp.gate_proj +Pruning ... +0.4999929368495941 0.19997523725032806 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968087233910638 0.9709505944546686 1.0 +err_prefin 1142463.125 +err_fin 795477.6875 +sparsity check 0.4999929368495941 +time 137.67 +41 mlp.up_proj +Pruning ... +0.4999929368495941 0.19997523725032806 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968087233910638 0.9709505944546686 1.0 +err_prefin 1101468.5 +err_fin 761714.1875 +sparsity check 0.4999929368495941 +time 137.89 +41 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 142451.421875 +err_fin 132629.96875 +sparsity check 0.49999999574252535 +time 135.63 +42 self_attn.q_proj +Pruning ... +0.4999999850988388 0.09999999403953552 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399461639197515 0.9709505944546686 1.0 +err_prefin 260025.5625 +err_fin 183340.40625 +sparsity check 0.4999999850988388 +time 74.85 +42 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 67961.0546875 +err_fin 57528.015625 +sparsity check 0.49999988079071045 +time 1.31 +42 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 62688.9921875 +err_fin 57235.4375 +sparsity check 0.49999988079071045 +time 1.31 +42 self_attn.o_proj +Pruning ... +0.49190540611743927 0.09190541505813599 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4137486574036342 0.9709505944546686 1.0 +err_prefin 35988.8125 +err_fin 16496.677734375 +sparsity check 0.49190540611743927 +time 67.76 +42 mlp.gate_proj +Pruning ... +0.4999929368495941 0.19997523725032806 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968087233910638 0.9709505944546686 1.0 +err_prefin 1237528.0 +err_fin 851591.625 +sparsity check 0.4999929368495941 +time 137.82 +42 mlp.up_proj +Pruning ... +0.4999929368495941 0.19997523725032806 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968087233910638 0.9709505944546686 1.0 +err_prefin 1164203.5 +err_fin 795813.75 +sparsity check 0.4999929368495941 +time 138.15 +42 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 155792.96875 +err_fin 144716.15625 +sparsity check 0.49999999574252535 +time 135.82 +43 self_attn.q_proj +Pruning ... +0.4999999850988388 0.09999999403953552 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399461639197515 0.9709505944546686 1.0 +err_prefin 180837.96875 +err_fin 126696.921875 +sparsity check 0.4999999850988388 +time 74.82 +43 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 60236.5 +err_fin 50642.33984375 +sparsity check 0.49999988079071045 +time 1.33 +43 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 49476.8046875 +err_fin 44093.203125 +sparsity check 0.49999988079071045 +time 1.31 +43 self_attn.o_proj +Pruning ... +0.49158766865730286 0.09158767759799957 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4126977820904947 0.9709505944546686 1.0 +err_prefin 25418.21484375 +err_fin 12953.314453125 +sparsity check 0.49158766865730286 +time 67.71 +43 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 1288788.25 +err_fin 880817.5625 +sparsity check 0.49999999574252535 +time 137.61 +43 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 1200073.75 +err_fin 814611.3125 +sparsity check 0.49999999574252535 +time 137.85 +43 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 172545.75 +err_fin 159962.21875 +sparsity check 0.49999999574252535 +time 135.65 +44 self_attn.q_proj +Pruning ... +0.4999999850988388 0.09999999403953552 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399461639197515 0.9709505944546686 1.0 +err_prefin 319052.75 +err_fin 221645.265625 +sparsity check 0.4999999850988388 +time 74.80 +44 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 68662.0703125 +err_fin 59145.0703125 +sparsity check 0.49999988079071045 +time 1.33 +44 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 77577.5859375 +err_fin 71179.9375 +sparsity check 0.49999988079071045 +time 1.30 +44 self_attn.o_proj +Pruning ... +0.49883486330509186 0.09883487224578857 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.43624189692368 0.9709505944546686 1.0 +err_prefin 73088.859375 +err_fin 36183.0546875 +sparsity check 0.49883486330509186 +time 67.66 +44 mlp.gate_proj +Pruning ... +0.4999929368495941 0.19997523725032806 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968087233910638 0.9709505944546686 1.0 +err_prefin 1415447.75 +err_fin 949839.5 +sparsity check 0.4999929368495941 +time 137.61 +44 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 1270309.625 +err_fin 844733.5 +sparsity check 0.49999999574252535 +time 137.93 +44 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 201792.09375 +err_fin 184093.28125 +sparsity check 0.49999999574252535 +time 135.62 +45 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 206996.09375 +err_fin 141192.875 +sparsity check 0.49998772144317627 +time 74.79 +45 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 63186.203125 +err_fin 53493.8125 +sparsity check 0.49999988079071045 +time 1.31 +45 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 71699.359375 +err_fin 65873.9609375 +sparsity check 0.49999988079071045 +time 1.31 +45 self_attn.o_proj +Pruning ... +0.49905718863010406 0.09905719757080078 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.436950425504849 0.9709505944546686 1.0 +err_prefin 20377.58984375 +err_fin 10190.095703125 +sparsity check 0.49905718863010406 +time 67.66 +45 mlp.gate_proj +Pruning ... +0.4999929368495941 0.19997523725032806 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968087233910638 0.9709505944546686 1.0 +err_prefin 1533385.25 +err_fin 1029034.375 +sparsity check 0.4999929368495941 +time 137.57 +45 mlp.up_proj +Pruning ... +0.4999860099383763 0.19995099306106567 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196794867264677 0.9709505944546686 1.0 +err_prefin 1358533.25 +err_fin 903150.375 +sparsity check 0.4999860099383763 +time 137.89 +45 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 208862.671875 +err_fin 191016.1875 +sparsity check 0.49999999574252535 +time 135.58 +46 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 174311.3125 +err_fin 118973.0859375 +sparsity check 0.49998772144317627 +time 74.82 +46 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 52012.3125 +err_fin 44647.17578125 +sparsity check 0.4999997615814209 +time 1.32 +46 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 64555.6953125 +err_fin 59597.5 +sparsity check 0.49999988079071045 +time 1.30 +46 self_attn.o_proj +Pruning ... +0.4972884804010391 0.09728848934173584 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4312914889320105 0.9709505944546686 1.0 +err_prefin 26271.66796875 +err_fin 13423.681640625 +sparsity check 0.4972884804010391 +time 67.67 +46 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 1643266.375 +err_fin 1109114.0 +sparsity check 0.49999999574252535 +time 137.63 +46 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 1432952.0 +err_fin 958490.9375 +sparsity check 0.49999999574252535 +time 137.96 +46 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 210038.59375 +err_fin 193328.28125 +sparsity check 0.49999999574252535 +time 135.64 +47 self_attn.q_proj +Pruning ... +0.4999999850988388 0.09999999403953552 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399461639197515 0.9709505944546686 1.0 +err_prefin 235650.875 +err_fin 161934.359375 +sparsity check 0.4999999850988388 +time 74.82 +47 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 72157.484375 +err_fin 61676.18359375 +sparsity check 0.49999988079071045 +time 1.32 +47 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 60162.171875 +err_fin 54988.4921875 +sparsity check 0.49999988079071045 +time 1.31 +47 self_attn.o_proj +Pruning ... +0.4947979599237442 0.09479796886444092 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4232358059733905 0.9709505944546686 1.0 +err_prefin 43868.10546875 +err_fin 17617.6796875 +sparsity check 0.4947979599237442 +time 67.67 +47 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 1707190.0 +err_fin 1138405.25 +sparsity check 0.49999999574252535 +time 137.71 +47 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 1468303.5 +err_fin 969839.125 +sparsity check 0.49999999574252535 +time 137.92 +47 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 235664.03125 +err_fin 214540.015625 +sparsity check 0.49999999574252535 +time 135.71 +48 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 120378.390625 +err_fin 81721.375 +sparsity check 0.49998772144317627 +time 74.82 +48 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 35274.08984375 +err_fin 29568.283203125 +sparsity check 0.49999988079071045 +time 1.32 +48 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 61085.9140625 +err_fin 56080.40625 +sparsity check 0.49999988079071045 +time 1.30 +48 self_attn.o_proj +Pruning ... +0.49621304869651794 0.09621305763721466 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4278256023079603 0.9709505944546686 1.0 +err_prefin 26171.66796875 +err_fin 13117.326171875 +sparsity check 0.49621304869651794 +time 67.69 +48 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 1784709.5 +err_fin 1185612.875 +sparsity check 0.49999999574252535 +time 137.64 +48 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 1528418.0 +err_fin 1005144.5625 +sparsity check 0.49999999574252535 +time 137.92 +48 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 236499.40625 +err_fin 216323.59375 +sparsity check 0.49999999574252535 +time 135.62 +49 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 71287.03125 +err_fin 48274.9453125 +sparsity check 0.49998772144317627 +time 74.88 +49 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 28936.22265625 +err_fin 24354.15234375 +sparsity check 0.49999988079071045 +time 1.32 +49 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 48516.5390625 +err_fin 43833.39453125 +sparsity check 0.49999988079071045 +time 1.30 +49 self_attn.o_proj +Pruning ... +0.49976903200149536 0.09976904094219208 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.439213632094054 0.9709505944546686 1.0 +err_prefin 12020.09375 +err_fin 5332.6396484375 +sparsity check 0.49976903200149536 +time 67.77 +49 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 1853601.5 +err_fin 1232022.875 +sparsity check 0.49999999574252535 +time 137.96 +49 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 1584004.75 +err_fin 1043144.75 +sparsity check 0.49999999574252535 +time 138.14 +49 mlp.down_proj +Pruning ... +0.49999999148505075 0.19999994337558746 0.4428571505205972 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968228405567136 0.9709505944546686 1.0 +err_prefin 237605.40625 +err_fin 218308.71875 +sparsity check 0.49999999148505075 +time 135.89 +50 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 107512.8515625 +err_fin 72915.1328125 +sparsity check 0.49998772144317627 +time 74.90 +50 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 31522.11328125 +err_fin 26362.2578125 +sparsity check 0.49999988079071045 +time 1.31 +50 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 62462.328125 +err_fin 56655.20703125 +sparsity check 0.49999988079071045 +time 1.30 +50 self_attn.o_proj +Pruning ... +0.49843937158584595 0.09843938052654266 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4349795244104402 0.9709505944546686 1.0 +err_prefin 19946.50390625 +err_fin 9844.80859375 +sparsity check 0.49843937158584595 +time 67.79 +50 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 1921409.5 +err_fin 1280349.875 +sparsity check 0.49999999574252535 +time 137.78 +50 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 1636713.5 +err_fin 1081025.75 +sparsity check 0.49999999574252535 +time 138.09 +50 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 239087.375 +err_fin 220257.65625 +sparsity check 0.49999999574252535 +time 135.76 +51 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 137617.84375 +err_fin 93474.2734375 +sparsity check 0.49998772144317627 +time 74.82 +51 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 43869.53125 +err_fin 37722.6015625 +sparsity check 0.49999988079071045 +time 1.31 +51 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 66521.75 +err_fin 60619.421875 +sparsity check 0.49999988079071045 +time 1.31 +51 self_attn.o_proj +Pruning ... +0.4904519319534302 0.09045194089412689 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.408927129814348 0.9709505944546686 1.0 +err_prefin 19066.23828125 +err_fin 8592.8701171875 +sparsity check 0.4904519319534302 +time 67.72 +51 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 1972205.0 +err_fin 1320068.625 +sparsity check 0.49999999574252535 +time 137.41 +51 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 1685180.375 +err_fin 1118161.75 +sparsity check 0.49999999574252535 +time 137.59 +51 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 240411.546875 +err_fin 221970.890625 +sparsity check 0.49999999574252535 +time 135.39 +52 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 223224.5625 +err_fin 153257.15625 +sparsity check 0.49998772144317627 +time 74.82 +52 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 50713.9921875 +err_fin 44092.09375 +sparsity check 0.49999988079071045 +time 1.32 +52 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 78441.4609375 +err_fin 71977.953125 +sparsity check 0.49999988079071045 +time 1.31 +52 self_attn.o_proj +Pruning ... +0.4918024092912674 0.09180241823196411 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4134082002290973 0.9709505944546686 1.0 +err_prefin 35848.7890625 +err_fin 16662.212890625 +sparsity check 0.4918024092912674 +time 67.68 +52 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2020939.25 +err_fin 1360066.875 +sparsity check 0.49999999574252535 +time 137.40 +52 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 1724693.5 +err_fin 1150505.875 +sparsity check 0.49999999574252535 +time 137.69 +52 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 245836.328125 +err_fin 227286.4375 +sparsity check 0.49999999574252535 +time 135.37 +53 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 75688.71875 +err_fin 52040.27734375 +sparsity check 0.49998772144317627 +time 74.83 +53 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 23458.7578125 +err_fin 19803.484375 +sparsity check 0.49999988079071045 +time 1.32 +53 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 61326.74609375 +err_fin 56115.328125 +sparsity check 0.49999988079071045 +time 1.31 +53 self_attn.o_proj +Pruning ... +0.4999876320362091 0.09998764097690582 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399070044133822 0.9709505944546686 1.0 +err_prefin 16158.013671875 +err_fin 7425.544921875 +sparsity check 0.4999876320362091 +time 67.69 +53 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2072409.25 +err_fin 1394473.875 +sparsity check 0.49999999574252535 +time 137.40 +53 mlp.up_proj +Pruning ... +0.49999306883130756 0.19997569918632507 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968089873836865 0.9709505944546686 1.0 +err_prefin 1771752.25 +err_fin 1181658.5 +sparsity check 0.49999306883130756 +time 137.69 +53 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 248550.5 +err_fin 229996.0 +sparsity check 0.49999999574252535 +time 135.37 +54 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 89043.7109375 +err_fin 60977.234375 +sparsity check 0.49998772144317627 +time 74.83 +54 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 26400.94140625 +err_fin 22019.529296875 +sparsity check 0.49999988079071045 +time 1.31 +54 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 63252.6484375 +err_fin 58003.8515625 +sparsity check 0.49999988079071045 +time 1.30 +54 self_attn.o_proj +Pruning ... +0.4955211728811264 0.09552118182182312 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4255857028398577 0.9709505944546686 1.0 +err_prefin 17532.365234375 +err_fin 8175.330078125 +sparsity check 0.4955211728811264 +time 67.70 +54 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2120160.5 +err_fin 1431364.625 +sparsity check 0.49999999574252535 +time 137.45 +54 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 1819114.125 +err_fin 1217025.5 +sparsity check 0.49999999574252535 +time 137.80 +54 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 248944.78125 +err_fin 230963.3125 +sparsity check 0.49999999574252535 +time 135.53 +55 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 141524.65625 +err_fin 97785.8359375 +sparsity check 0.49998772144317627 +time 74.85 +55 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 43051.9921875 +err_fin 36888.5859375 +sparsity check 0.49999988079071045 +time 1.31 +55 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 81794.875 +err_fin 74996.4375 +sparsity check 0.49999988079071045 +time 1.32 +55 self_attn.o_proj +Pruning ... +0.49940069019794464 0.09940069913864136 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4380435602332058 0.9709505944546686 1.0 +err_prefin 31721.14453125 +err_fin 14184.46875 +sparsity check 0.49940069019794464 +time 67.74 +55 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2157582.5 +err_fin 1460996.75 +sparsity check 0.49999999574252535 +time 137.82 +55 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 1866490.75 +err_fin 1252972.0 +sparsity check 0.49999999574252535 +time 138.03 +55 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 254706.140625 +err_fin 237028.265625 +sparsity check 0.49999999574252535 +time 135.82 +56 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 103589.296875 +err_fin 71867.15625 +sparsity check 0.49998772144317627 +time 74.84 +56 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 34095.0390625 +err_fin 28903.630859375 +sparsity check 0.49999988079071045 +time 1.32 +56 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 57818.0625 +err_fin 53014.0859375 +sparsity check 0.49999988079071045 +time 1.31 +56 self_attn.o_proj +Pruning ... +0.49720531702041626 0.09720532596111298 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4310241513281567 0.9709505944546686 1.0 +err_prefin 17526.7421875 +err_fin 7605.9521484375 +sparsity check 0.49720531702041626 +time 67.73 +56 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2191015.75 +err_fin 1483332.875 +sparsity check 0.49999999574252535 +time 137.77 +56 mlp.up_proj +Pruning ... +0.4999930475439344 0.1999756246805191 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968089448042682 0.9709505944546686 1.0 +err_prefin 1899413.0 +err_fin 1275646.75 +sparsity check 0.4999930475439344 +time 138.10 +56 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 258692.40625 +err_fin 240815.328125 +sparsity check 0.49999999574252535 +time 135.75 +57 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 99702.078125 +err_fin 68511.875 +sparsity check 0.49998772144317627 +time 74.80 +57 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 32397.16015625 +err_fin 27770.46484375 +sparsity check 0.49999988079071045 +time 1.31 +57 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 74720.7109375 +err_fin 68767.1875 +sparsity check 0.49999988079071045 +time 1.31 +57 self_attn.o_proj +Pruning ... +0.49997578561306 0.09997579455375671 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4398694486684214 0.9709505944546686 1.0 +err_prefin 19992.44921875 +err_fin 9417.009765625 +sparsity check 0.49997578561306 +time 67.67 +57 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2257321.0 +err_fin 1533310.25 +sparsity check 0.49999999574252535 +time 137.69 +57 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 1962325.25 +err_fin 1322062.25 +sparsity check 0.49999999574252535 +time 137.91 +57 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 263333.6875 +err_fin 245607.65625 +sparsity check 0.49999999574252535 +time 135.67 +58 self_attn.q_proj +Pruning ... +0.4999881684780121 0.0999881774187088 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399087049976413 0.9709505944546686 1.0 +err_prefin 57746.41796875 +err_fin 39897.58203125 +sparsity check 0.4999881684780121 +time 74.85 +58 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 20086.544921875 +err_fin 16584.66796875 +sparsity check 0.49999988079071045 +time 1.30 +58 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 53898.4453125 +err_fin 48980.71484375 +sparsity check 0.49999988079071045 +time 1.31 +58 self_attn.o_proj +Pruning ... +0.49996423721313477 0.09996424615383148 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4398328355561842 0.9709505944546686 1.0 +err_prefin 14397.3349609375 +err_fin 6855.41748046875 +sparsity check 0.49996423721313477 +time 67.77 +58 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2304389.5 +err_fin 1570758.625 +sparsity check 0.49999999574252535 +time 137.87 +58 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2006441.25 +err_fin 1357478.75 +sparsity check 0.49999999574252535 +time 138.14 +58 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 265134.375 +err_fin 247697.96875 +sparsity check 0.49999999574252535 +time 135.77 +59 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 72747.109375 +err_fin 50376.4765625 +sparsity check 0.49998772144317627 +time 74.85 +59 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 23255.9375 +err_fin 19589.732421875 +sparsity check 0.49999988079071045 +time 1.32 +59 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 60189.921875 +err_fin 54635.609375 +sparsity check 0.49999988079071045 +time 1.30 +59 self_attn.o_proj +Pruning ... +0.49997544288635254 0.09997545182704926 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4398683621165247 0.9709505944546686 1.0 +err_prefin 14854.1142578125 +err_fin 6778.9365234375 +sparsity check 0.49997544288635254 +time 67.75 +59 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2355445.75 +err_fin 1611687.125 +sparsity check 0.49999999574252535 +time 137.81 +59 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2056139.5 +err_fin 1396654.75 +sparsity check 0.49999999574252535 +time 137.99 +59 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 265730.625 +err_fin 248909.6875 +sparsity check 0.49999999574252535 +time 135.77 +60 self_attn.q_proj +Pruning ... +0.499952495098114 0.09995250403881073 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4397956060958843 0.9709505944546686 1.0 +err_prefin 14072.958984375 +err_fin 9710.4326171875 +sparsity check 0.499952495098114 +time 74.84 +60 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 5356.01171875 +err_fin 4371.15625 +sparsity check 0.49999988079071045 +time 1.31 +60 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 32008.37109375 +err_fin 29080.765625 +sparsity check 0.49999988079071045 +time 1.30 +60 self_attn.o_proj +Pruning ... +0.4999999850988388 0.09999999403953552 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399461639197515 0.9709505944546686 1.0 +err_prefin 8923.4423828125 +err_fin 4361.81982421875 +sparsity check 0.4999999850988388 +time 67.71 +60 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2387430.5 +err_fin 1642731.5 +sparsity check 0.49999999574252535 +time 137.71 +60 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2096408.625 +err_fin 1431900.75 +sparsity check 0.49999999574252535 +time 138.01 +60 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 264923.25 +err_fin 248725.640625 +sparsity check 0.49999999574252535 +time 135.66 +61 self_attn.q_proj +Pruning ... +0.4999639242887497 0.09996393322944641 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.439831843429192 0.9709505944546686 1.0 +err_prefin 47633.66015625 +err_fin 33410.7421875 +sparsity check 0.4999639242887497 +time 74.87 +61 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 16931.99609375 +err_fin 14150.962890625 +sparsity check 0.49999988079071045 +time 1.31 +61 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 54502.34765625 +err_fin 50002.5234375 +sparsity check 0.49999988079071045 +time 1.31 +61 self_attn.o_proj +Pruning ... +0.4999999850988388 0.09999999403953552 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399461639197515 0.9709505944546686 1.0 +err_prefin 13544.08203125 +err_fin 6578.74658203125 +sparsity check 0.4999999850988388 +time 67.70 +61 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2429836.0 +err_fin 1676024.0 +sparsity check 0.49999999574252535 +time 137.79 +61 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2142882.75 +err_fin 1467843.25 +sparsity check 0.49999999574252535 +time 138.09 +61 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 266466.875 +err_fin 250583.453125 +sparsity check 0.49999999574252535 +time 135.77 +62 self_attn.q_proj +Pruning ... +0.4999881684780121 0.0999881774187088 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399087049976413 0.9709505944546686 1.0 +err_prefin 52108.1484375 +err_fin 36507.953125 +sparsity check 0.4999881684780121 +time 74.97 +62 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 18821.046875 +err_fin 15770.6708984375 +sparsity check 0.49999988079071045 +time 1.32 +62 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 59863.88671875 +err_fin 54585.32421875 +sparsity check 0.49999988079071045 +time 1.33 +62 self_attn.o_proj +Pruning ... +0.4999999850988388 0.09999999403953552 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399461639197515 0.9709505944546686 1.0 +err_prefin 12698.0546875 +err_fin 5501.97021484375 +sparsity check 0.4999999850988388 +time 67.85 +62 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2478572.0 +err_fin 1717879.0 +sparsity check 0.49999999574252535 +time 137.85 +62 mlp.up_proj +Pruning ... +0.4999930475439344 0.1999756246805191 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968089448042682 0.9709505944546686 1.0 +err_prefin 2197324.5 +err_fin 1512603.5 +sparsity check 0.4999930475439344 +time 138.11 +62 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 269533.28125 +err_fin 253766.6875 +sparsity check 0.49999999574252535 +time 135.78 +63 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 45090.234375 +err_fin 31936.611328125 +sparsity check 0.49998772144317627 +time 74.96 +63 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 14729.412109375 +err_fin 12177.5546875 +sparsity check 0.49999988079071045 +time 1.34 +63 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 47941.43359375 +err_fin 43172.2734375 +sparsity check 0.49999988079071045 +time 1.31 +63 self_attn.o_proj +Pruning ... +0.49934376776218414 0.09934377670288086 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.437862545964863 0.9709505944546686 1.0 +err_prefin 8912.994140625 +err_fin 4147.9580078125 +sparsity check 0.49934376776218414 +time 67.83 +63 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2530823.0 +err_fin 1761127.75 +sparsity check 0.49999999574252535 +time 137.82 +63 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2248559.0 +err_fin 1554851.0 +sparsity check 0.49999999574252535 +time 138.01 +63 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 273414.4375 +err_fin 257696.5 +sparsity check 0.49999999574252535 +time 135.76 +64 self_attn.q_proj +Pruning ... +0.49997590482234955 0.09997591376304626 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4398698265990744 0.9709505944546686 1.0 +err_prefin 83136.7265625 +err_fin 58826.8125 +sparsity check 0.49997590482234955 +time 75.01 +64 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 27826.56640625 +err_fin 23221.775390625 +sparsity check 0.49999988079071045 +time 1.32 +64 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 66823.0 +err_fin 61412.35546875 +sparsity check 0.49999988079071045 +time 1.31 +64 self_attn.o_proj +Pruning ... +0.4999999850988388 0.09999999403953552 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399461639197515 0.9709505944546686 1.0 +err_prefin 19559.7578125 +err_fin 8276.00390625 +sparsity check 0.4999999850988388 +time 67.84 +64 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2579009.0 +err_fin 1800084.625 +sparsity check 0.49999999574252535 +time 137.79 +64 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2299356.5 +err_fin 1594780.0 +sparsity check 0.49999999574252535 +time 138.08 +64 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 278264.25 +err_fin 262473.65625 +sparsity check 0.49999999574252535 +time 135.67 +65 self_attn.q_proj +Pruning ... +0.49996471405029297 0.09996473789215088 0.3999999761581421 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4398343858930924 0.9709505944546686 1.0 +err_prefin 24457.505859375 +err_fin 17313.802734375 +sparsity check 0.49996471405029297 +time 74.98 +65 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 8344.43359375 +err_fin 6731.2822265625 +sparsity check 0.49999988079071045 +time 1.33 +65 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 37324.55078125 +err_fin 33578.91796875 +sparsity check 0.49999988079071045 +time 1.33 +65 self_attn.o_proj +Pruning ... +0.49989132583141327 0.09989133477210999 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4396016275189618 0.9709505944546686 1.0 +err_prefin 8445.197265625 +err_fin 3808.46875 +sparsity check 0.49989132583141327 +time 67.83 +65 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2621310.5 +err_fin 1836813.125 +sparsity check 0.49999999574252535 +time 137.83 +65 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2349046.5 +err_fin 1636942.625 +sparsity check 0.49999999574252535 +time 138.02 +65 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 283536.125 +err_fin 267619.4375 +sparsity check 0.49999999574252535 +time 135.77 +66 self_attn.q_proj +Pruning ... +0.4999881684780121 0.0999881774187088 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399087049976413 0.9709505944546686 1.0 +err_prefin 43862.71484375 +err_fin 30926.05859375 +sparsity check 0.4999881684780121 +time 74.98 +66 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 15598.61328125 +err_fin 12776.958984375 +sparsity check 0.49999988079071045 +time 1.32 +66 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 48583.1796875 +err_fin 44552.58984375 +sparsity check 0.49999988079071045 +time 1.31 +66 self_attn.o_proj +Pruning ... +0.4999999850988388 0.09999999403953552 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399461639197515 0.9709505944546686 1.0 +err_prefin 15405.40234375 +err_fin 7926.279296875 +sparsity check 0.4999999850988388 +time 67.85 +66 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2680995.25 +err_fin 1879446.625 +sparsity check 0.49999999574252535 +time 137.82 +66 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2419187.25 +err_fin 1686362.125 +sparsity check 0.49999999574252535 +time 138.10 +66 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 296520.46875 +err_fin 279841.875 +sparsity check 0.49999999574252535 +time 135.74 +67 self_attn.q_proj +Pruning ... +0.4999881684780121 0.0999881774187088 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399087049976413 0.9709505944546686 1.0 +err_prefin 19128.6796875 +err_fin 13560.47265625 +sparsity check 0.4999881684780121 +time 74.99 +67 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 6277.0283203125 +err_fin 4847.79541015625 +sparsity check 0.49999988079071045 +time 1.33 +67 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 20735.083984375 +err_fin 18118.58984375 +sparsity check 0.49999988079071045 +time 1.33 +67 self_attn.o_proj +Pruning ... +0.4955729842185974 0.09557299315929413 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4257537146560133 0.9709505944546686 1.0 +err_prefin 4755.5068359375 +err_fin 1735.486328125 +sparsity check 0.4955729842185974 +time 67.80 +67 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2708568.5 +err_fin 1903017.75 +sparsity check 0.49999999574252535 +time 137.85 +67 mlp.up_proj +Pruning ... +0.49999999148505075 0.19999994337558746 0.4428571505205972 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1968228405567136 0.9709505944546686 1.0 +err_prefin 2461934.5 +err_fin 1720931.75 +sparsity check 0.49999999148505075 +time 138.01 +67 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 300484.4375 +err_fin 283625.40625 +sparsity check 0.49999999574252535 +time 135.76 +68 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 151678.546875 +err_fin 107747.7578125 +sparsity check 0.49998772144317627 +time 75.01 +68 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 47341.45703125 +err_fin 40328.2421875 +sparsity check 0.49999988079071045 +time 1.33 +68 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 97499.9140625 +err_fin 90175.765625 +sparsity check 0.49999988079071045 +time 1.33 +68 self_attn.o_proj +Pruning ... +0.4997800439596176 0.09978005290031433 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4392485790125884 0.9709505944546686 1.0 +err_prefin 16619.52734375 +err_fin 8152.15625 +sparsity check 0.4997800439596176 +time 67.83 +68 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2801124.75 +err_fin 1972732.0 +sparsity check 0.49999999574252535 +time 137.91 +68 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2563574.25 +err_fin 1795951.25 +sparsity check 0.49999999574252535 +time 138.38 +68 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 321001.375 +err_fin 302484.65625 +sparsity check 0.49999999574252535 +time 135.86 +69 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 249855.0625 +err_fin 178340.03125 +sparsity check 0.49998772144317627 +time 75.04 +69 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 84946.28125 +err_fin 73483.71875 +sparsity check 0.49999988079071045 +time 1.32 +69 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 130384.75 +err_fin 119503.3671875 +sparsity check 0.49999988079071045 +time 1.33 +69 self_attn.o_proj +Pruning ... +0.4963749498128891 0.09637495875358582 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4283485972673708 0.9709505944546686 1.0 +err_prefin 29519.19140625 +err_fin 13056.2744140625 +sparsity check 0.4963749498128891 +time 67.92 +69 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2851497.0 +err_fin 2009025.75 +sparsity check 0.49999999574252535 +time 137.93 +69 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2639775.0 +err_fin 1850543.0 +sparsity check 0.49999999574252535 +time 138.11 +69 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 337184.28125 +err_fin 318016.5 +sparsity check 0.49999999574252535 +time 135.74 +70 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 145374.59375 +err_fin 103462.859375 +sparsity check 0.49998772144317627 +time 75.11 +70 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 46898.140625 +err_fin 39353.4453125 +sparsity check 0.49999988079071045 +time 1.34 +70 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 76992.0546875 +err_fin 70274.2109375 +sparsity check 0.49999988079071045 +time 1.33 +70 self_attn.o_proj +Pruning ... +0.49335020780563354 0.09335021674633026 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4185052212485088 0.9709505944546686 1.0 +err_prefin 23318.654296875 +err_fin 10277.1904296875 +sparsity check 0.49335020780563354 +time 67.97 +70 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2915899.0 +err_fin 2054024.875 +sparsity check 0.49999999574252535 +time 137.91 +70 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2717377.5 +err_fin 1904456.0 +sparsity check 0.49999999574252535 +time 138.21 +70 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 360152.78125 +err_fin 339412.3125 +sparsity check 0.49999999574252535 +time 135.73 +71 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 190468.0625 +err_fin 136425.984375 +sparsity check 0.49998772144317627 +time 74.99 +71 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 63572.9140625 +err_fin 53767.1953125 +sparsity check 0.49999988079071045 +time 1.32 +71 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 80682.921875 +err_fin 74023.1640625 +sparsity check 0.49999988079071045 +time 1.33 +71 self_attn.o_proj +Pruning ... +0.49673882126808167 0.09673883020877838 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4295224405461973 0.9709505944546686 1.0 +err_prefin 30127.17578125 +err_fin 13263.951171875 +sparsity check 0.49673882126808167 +time 67.89 +71 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 3005487.5 +err_fin 2112914.75 +sparsity check 0.49999999574252535 +time 137.85 +71 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2807423.5 +err_fin 1965394.0 +sparsity check 0.49999999574252535 +time 138.01 +71 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 381285.5 +err_fin 358692.25 +sparsity check 0.49999999574252535 +time 135.80 +72 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 263776.59375 +err_fin 188009.8125 +sparsity check 0.49998772144317627 +time 74.98 +72 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 80561.09375 +err_fin 70249.53125 +sparsity check 0.49999988079071045 +time 1.34 +72 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 92642.984375 +err_fin 85140.859375 +sparsity check 0.49999988079071045 +time 1.32 +72 self_attn.o_proj +Pruning ... +0.4999038428068161 0.09990385174751282 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4396413259491503 0.9709505944546686 1.0 +err_prefin 37416.44921875 +err_fin 18725.60546875 +sparsity check 0.4999038428068161 +time 67.84 +72 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 3078066.0 +err_fin 2158296.0 +sparsity check 0.49999999574252535 +time 137.80 +72 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2895632.75 +err_fin 2021234.0 +sparsity check 0.49999999574252535 +time 138.13 +72 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 409946.15625 +err_fin 385314.8125 +sparsity check 0.49999999574252535 +time 135.74 +73 self_attn.q_proj +Pruning ... +0.49998772144317627 0.09998773038387299 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399072878444124 0.9709505944546686 1.0 +err_prefin 246956.09375 +err_fin 175104.59375 +sparsity check 0.49998772144317627 +time 74.97 +73 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 77215.15625 +err_fin 66814.109375 +sparsity check 0.49999988079071045 +time 1.33 +73 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 107636.7109375 +err_fin 97223.234375 +sparsity check 0.49999988079071045 +time 1.33 +73 self_attn.o_proj +Pruning ... +0.4999135881662369 0.0999135971069336 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4396722322722684 0.9709505944546686 1.0 +err_prefin 25167.322265625 +err_fin 12153.6572265625 +sparsity check 0.4999135881662369 +time 67.84 +73 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 3152760.75 +err_fin 2202667.5 +sparsity check 0.49999999574252535 +time 137.82 +73 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2980890.0 +err_fin 2074600.75 +sparsity check 0.49999999574252535 +time 138.02 +73 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 451407.625 +err_fin 421831.125 +sparsity check 0.49999999574252535 +time 135.76 +74 self_attn.q_proj +Pruning ... +0.4999999850988388 0.09999999403953552 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399461639197515 0.9709505944546686 1.0 +err_prefin 227363.40625 +err_fin 159704.0625 +sparsity check 0.4999999850988388 +time 75.01 +74 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 70052.21875 +err_fin 58712.01171875 +sparsity check 0.49999988079071045 +time 1.32 +74 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 81947.59375 +err_fin 73279.21875 +sparsity check 0.49999988079071045 +time 1.32 +74 self_attn.o_proj +Pruning ... +0.49862484633922577 0.09862485527992249 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4355718584986665 0.9709505944546686 1.0 +err_prefin 49821.27734375 +err_fin 20287.7265625 +sparsity check 0.49862484633922577 +time 67.85 +74 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 3228713.25 +err_fin 2229053.25 +sparsity check 0.49999999574252535 +time 137.83 +74 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 3063793.0 +err_fin 2106208.5 +sparsity check 0.49999999574252535 +time 138.11 +74 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 496875.25 +err_fin 461544.6875 +sparsity check 0.49999999574252535 +time 135.76 +75 self_attn.q_proj +Pruning ... +0.4999999850988388 0.09999999403953552 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399461639197515 0.9709505944546686 1.0 +err_prefin 229521.375 +err_fin 159013.125 +sparsity check 0.4999999850988388 +time 74.99 +75 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 67270.984375 +err_fin 55461.5546875 +sparsity check 0.49999988079071045 +time 1.34 +75 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 96647.6875 +err_fin 85233.734375 +sparsity check 0.49999988079071045 +time 1.32 +75 self_attn.o_proj +Pruning ... +0.49730534851551056 0.09730535745620728 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4313456994326053 0.9709505944546686 1.0 +err_prefin 45578.48828125 +err_fin 18587.2421875 +sparsity check 0.49730534851551056 +time 67.86 +75 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 3260017.0 +err_fin 2230485.75 +sparsity check 0.49999999574252535 +time 137.85 +75 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 3095145.25 +err_fin 2107935.5 +sparsity check 0.49999999574252535 +time 138.03 +75 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 559767.375 +err_fin 513296.1875 +sparsity check 0.49999999574252535 +time 135.77 +76 self_attn.q_proj +Pruning ... +0.4999999850988388 0.09999999403953552 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399461639197515 0.9709505944546686 1.0 +err_prefin 348435.8125 +err_fin 235325.90625 +sparsity check 0.4999999850988388 +time 74.99 +76 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 97760.9296875 +err_fin 80445.140625 +sparsity check 0.49999988079071045 +time 1.34 +76 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 143229.6875 +err_fin 125784.1484375 +sparsity check 0.49999988079071045 +time 1.31 +76 self_attn.o_proj +Pruning ... +0.489165797829628 0.08916580677032471 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4046298318294044 0.9709505944546686 1.0 +err_prefin 153192.640625 +err_fin 76294.2109375 +sparsity check 0.489165797829628 +time 67.90 +76 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 3252784.0 +err_fin 2172913.0 +sparsity check 0.49999999574252535 +time 137.81 +76 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 3067312.25 +err_fin 2038594.0 +sparsity check 0.49999999574252535 +time 138.11 +76 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 679483.9375 +err_fin 606641.75 +sparsity check 0.49999999574252535 +time 135.77 +77 self_attn.q_proj +Pruning ... +0.4999999850988388 0.09999999403953552 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399461639197515 0.9709505944546686 1.0 +err_prefin 232745.203125 +err_fin 151074.234375 +sparsity check 0.4999999850988388 +time 74.99 +77 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 71468.984375 +err_fin 56219.4765625 +sparsity check 0.49999988079071045 +time 1.33 +77 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 99336.578125 +err_fin 85069.78125 +sparsity check 0.49999988079071045 +time 1.31 +77 self_attn.o_proj +Pruning ... +0.4945366233587265 0.09453663229942322 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.422384499387892 0.9709505944546686 1.0 +err_prefin 61839.5625 +err_fin 21203.197265625 +sparsity check 0.4945366233587265 +time 67.84 +77 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 3049443.0 +err_fin 1970115.0 +sparsity check 0.49999999574252535 +time 137.82 +77 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2879499.75 +err_fin 1849714.0 +sparsity check 0.49999999574252535 +time 138.07 +77 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 790908.0 +err_fin 679657.75 +sparsity check 0.49999999574252535 +time 135.75 +78 self_attn.q_proj +Pruning ... +0.4999999850988388 0.09999999403953552 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399461639197515 0.9709505944546686 1.0 +err_prefin 220484.484375 +err_fin 131148.265625 +sparsity check 0.4999999850988388 +time 74.97 +78 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 73585.7265625 +err_fin 55216.25390625 +sparsity check 0.49999988079071045 +time 1.33 +78 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 87354.7421875 +err_fin 75689.359375 +sparsity check 0.49999988079071045 +time 1.31 +78 self_attn.o_proj +Pruning ... +0.4979788661003113 0.097978875041008 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4335064294429443 0.9709505944546686 1.0 +err_prefin 42572.82421875 +err_fin 16020.3203125 +sparsity check 0.4979788661003113 +time 67.85 +78 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2573022.5 +err_fin 1585658.75 +sparsity check 0.49999999574252535 +time 137.82 +78 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 2404934.5 +err_fin 1474085.5 +sparsity check 0.49999999574252535 +time 138.32 +78 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 857094.25 +err_fin 686390.6875 +sparsity check 0.49999999574252535 +time 135.81 +79 self_attn.q_proj +Pruning ... +0.49998800456523895 0.09998801350593567 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4399081853751627 0.9709505944546686 1.0 +err_prefin 143236.46875 +err_fin 77749.5390625 +sparsity check 0.49998800456523895 +time 74.99 +79 self_attn.k_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 54170.86328125 +err_fin 40600.9609375 +sparsity check 0.49999988079071045 +time 1.32 +79 self_attn.v_proj +Pruning ... +0.49999988079071045 0.19999980926513672 0.47499990463256836 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.088436829449873 0.9709505944546686 1.0 +err_prefin 39567.859375 +err_fin 31839.0 +sparsity check 0.49999988079071045 +time 1.32 +79 self_attn.o_proj +Pruning ... +0.49016299843788147 0.09016300737857819 0.3999999910593033 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4079642697638912 0.9709505944546686 1.0 +err_prefin 12191.912109375 +err_fin 2890.40625 +sparsity check 0.49016299843788147 +time 67.90 +79 mlp.gate_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 1599801.125 +err_fin 894345.875 +sparsity check 0.49999999574252535 +time 137.84 +79 mlp.up_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 1467271.75 +err_fin 819270.125 +sparsity check 0.49999999574252535 +time 138.02 +79 mlp.down_proj +Pruning ... +0.49999999574252535 0.19999994337558746 0.4428571547780718 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.196822841966814 0.9709505944546686 1.0 +err_prefin 820401.125 +err_fin 514209.375 +sparsity check 0.49999999574252535 +time 135.80 +model.embed_tokens.weight tensor(2.5520e-06) +model.layers.0.self_attn.q_proj.weight tensor(4.6343e-06) +model.layers.0.self_attn.k_proj.weight tensor(0.0204) +model.layers.0.self_attn.v_proj.weight tensor(0.0536) +model.layers.0.self_attn.o_proj.weight tensor(3.9041e-06) +model.layers.0.mlp.gate_proj.weight tensor(2.7333e-06) +model.layers.0.mlp.up_proj.weight tensor(2.9206e-06) +model.layers.0.mlp.down_proj.weight tensor(0.0126) +50911.568996191025 +Dataset: wikitext2 +Evaluating ... +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +Perplexity: 3.673633 diff --git a/logs/llama2-70-0.5-no-final b/logs/llama2-70-0.5-no-final new file mode 100644 index 0000000..7f3557d --- /dev/null +++ b/logs/llama2-70-0.5-no-final @@ -0,0 +1,2897 @@ +Running on dev: cuda:0 +loading llama +llama loaded +Starting... on device cuda:0 +Ready. +0 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 10.717611312866211 +time 74.34 +0 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 21.521377563476562 +time 1.29 +0 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 2.3604230880737305 +time 1.29 +0 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 0.3077867031097412 +time 66.99 +0 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 232.090087890625 +time 132.95 +0 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 232.13897705078125 +time 133.22 +0 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 4.307583332061768 +time 132.33 +1 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 201.70103454589844 +time 74.12 +1 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 192.19281005859375 +time 1.31 +1 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 17.129507064819336 +time 1.31 +1 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 9.993308067321777 +time 66.99 +1 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1592.91357421875 +time 132.49 +1 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1731.1697998046875 +time 132.92 +1 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 102.35027313232422 +time 132.15 +2 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 690.9444580078125 +time 74.06 +2 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 813.6088256835938 +time 1.31 +2 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 82.71668243408203 +time 1.31 +2 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 53.367958068847656 +time 67.04 +2 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 7192.265625 +time 132.52 +2 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 7540.5107421875 +time 132.90 +2 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 375.14044189453125 +time 132.14 +3 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 5224.77783203125 +time 74.02 +3 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 3596.468505859375 +time 1.30 +3 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 1143.7545166015625 +time 1.31 +3 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 106.50784301757812 +time 66.93 +3 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 18111.9453125 +time 132.51 +3 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 18275.353515625 +time 132.93 +3 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 569.8212890625 +time 132.08 +4 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 11148.994140625 +time 74.05 +4 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 5542.4638671875 +time 1.30 +4 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 2011.840576171875 +time 1.30 +4 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 131.37802124023438 +time 66.96 +4 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 32252.333984375 +time 132.55 +4 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 31786.78515625 +time 132.92 +4 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 882.1239013671875 +time 132.08 +5 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 16082.5703125 +time 74.02 +5 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 9010.7841796875 +time 1.30 +5 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 2952.642578125 +time 1.30 +5 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 213.86009216308594 +time 66.93 +5 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 42302.0390625 +time 132.50 +5 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 41377.6875 +time 132.87 +5 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1255.13427734375 +time 132.12 +6 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 18580.162109375 +time 74.03 +6 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 10272.94140625 +time 1.32 +6 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 3491.278564453125 +time 1.30 +6 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 417.36669921875 +time 66.99 +6 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 57757.77734375 +time 132.54 +6 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 56122.015625 +time 132.88 +6 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1871.470458984375 +time 132.04 +7 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 29744.19140625 +time 74.01 +7 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 15506.8740234375 +time 1.31 +7 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 5035.0078125 +time 1.30 +7 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 506.8324279785156 +time 66.97 +7 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 72811.9140625 +time 132.51 +7 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 70573.25 +time 132.87 +7 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 2655.89404296875 +time 132.12 +8 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 26077.62109375 +time 74.05 +8 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 13514.57421875 +time 1.30 +8 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 4806.9443359375 +time 1.30 +8 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 844.914306640625 +time 66.97 +8 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 102421.359375 +time 132.53 +8 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 97549.1953125 +time 132.84 +8 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 4729.75341796875 +time 132.09 +9 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 29380.02734375 +time 74.00 +9 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 15578.587890625 +time 1.31 +9 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 7502.291015625 +time 1.31 +9 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 459.5536193847656 +time 66.93 +9 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 133887.84375 +time 132.51 +9 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 126507.9453125 +time 132.89 +9 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 4669.8134765625 +time 132.08 +10 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 17848.37890625 +time 74.01 +10 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 7432.9658203125 +time 1.30 +10 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 7279.8779296875 +time 1.30 +10 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 358.6090087890625 +time 66.98 +10 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 164667.015625 +time 132.50 +10 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 154845.6875 +time 132.76 +10 mlp.down_proj +Pruning ... +0.4999999872275761 0.2499999701976776 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218706168299 0.9709505944546686 1.0 +err_prefin 5296.51416015625 +time 132.12 +11 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 28532.48046875 +time 74.02 +11 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 15705.3203125 +time 1.31 +11 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 8515.6044921875 +time 1.29 +11 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 568.9130859375 +time 66.97 +11 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 178439.65625 +time 132.51 +11 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 168405.421875 +time 132.92 +11 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 5870.9169921875 +time 132.11 +12 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 24391.189453125 +time 74.02 +12 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 13878.55859375 +time 1.31 +12 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 6258.75048828125 +time 1.30 +12 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 705.231689453125 +time 66.98 +12 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 176686.15625 +time 132.53 +12 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 168170.15625 +time 132.90 +12 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 6499.34326171875 +time 132.08 +13 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 31191.0859375 +time 74.00 +13 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 14700.6474609375 +time 1.31 +13 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 8553.900390625 +time 1.30 +13 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 1830.4453125 +time 66.89 +13 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 186636.21875 +time 132.50 +13 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 179507.25 +time 132.85 +13 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 8272.4453125 +time 132.07 +14 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 50958.8984375 +time 74.04 +14 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 25200.478515625 +time 1.31 +14 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 11939.166015625 +time 1.30 +14 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 2202.9189453125 +time 66.99 +14 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 217031.734375 +time 132.52 +14 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 208673.453125 +time 132.80 +14 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 8876.578125 +time 132.06 +15 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 37785.51171875 +time 74.02 +15 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 17779.82421875 +time 1.32 +15 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 14876.7451171875 +time 1.30 +15 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 2147.2265625 +time 66.97 +15 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 232639.15625 +time 132.53 +15 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 224543.671875 +time 132.91 +15 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 10591.8955078125 +time 132.05 +16 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 36921.046875 +time 74.00 +16 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 18139.66796875 +time 1.31 +16 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 13697.25390625 +time 1.29 +16 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 1860.264892578125 +time 66.96 +16 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 245963.125 +time 132.72 +16 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 238381.765625 +time 133.25 +16 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 10677.962890625 +time 132.36 +17 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 47274.65234375 +time 74.00 +17 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 24148.25 +time 1.31 +17 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 11666.39453125 +time 1.30 +17 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 2883.78466796875 +time 66.96 +17 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 242515.671875 +time 132.74 +17 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 235619.421875 +time 133.17 +17 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 13573.5234375 +time 132.43 +18 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 44734.6328125 +time 73.99 +18 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 21054.4296875 +time 1.31 +18 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 18631.873046875 +time 1.30 +18 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 2936.5966796875 +time 66.98 +18 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 269289.59375 +time 132.74 +18 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 259858.96875 +time 133.15 +18 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 14795.94921875 +time 132.43 +19 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 53659.69140625 +time 74.04 +19 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 25075.787109375 +time 1.31 +19 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 22434.982421875 +time 1.31 +19 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 2300.91943359375 +time 66.99 +19 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 292668.4375 +time 132.78 +19 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 282869.3125 +time 133.17 +19 mlp.down_proj +Pruning ... +0.4999999872275761 0.2499999701976776 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218706168299 0.9709505944546686 1.0 +err_prefin 15182.5849609375 +time 132.37 +20 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 48110.94921875 +time 73.99 +20 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 21109.404296875 +time 1.31 +20 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 18582.00390625 +time 1.30 +20 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 1811.337890625 +time 66.96 +20 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 316026.875 +time 132.75 +20 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 306260.875 +time 133.16 +20 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 15475.8134765625 +time 132.43 +21 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 30222.734375 +time 74.00 +21 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 11362.373046875 +time 1.31 +21 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 15775.4921875 +time 1.30 +21 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 2444.572265625 +time 66.93 +21 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 330811.75 +time 132.74 +21 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 322009.84375 +time 133.14 +21 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 16136.716796875 +time 132.39 +22 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 29106.16796875 +time 73.99 +22 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 13556.794921875 +time 1.31 +22 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 18032.66015625 +time 1.30 +22 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 1689.82666015625 +time 66.96 +22 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 348179.09375 +time 132.73 +22 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 339720.15625 +time 133.06 +22 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 16580.63671875 +time 132.43 +23 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 41801.40625 +time 73.99 +23 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 21408.697265625 +time 1.30 +23 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 18993.08203125 +time 1.30 +23 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 2295.337646484375 +time 66.98 +23 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 358201.5 +time 132.72 +23 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 350123.40625 +time 133.19 +23 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 17124.4765625 +time 132.50 +24 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 36886.6328125 +time 74.02 +24 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 18703.375 +time 1.31 +24 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 14053.79296875 +time 1.30 +24 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 2117.28173828125 +time 66.97 +24 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 346989.6875 +time 132.65 +24 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 341473.03125 +time 132.92 +24 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 17269.02734375 +time 132.23 +25 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 42170.6015625 +time 74.23 +25 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 21158.166015625 +time 1.31 +25 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 14595.8388671875 +time 1.31 +25 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 4943.6875 +time 67.18 +25 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 326379.40625 +time 133.18 +25 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 323482.0 +time 133.58 +25 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 23475.69921875 +time 132.68 +26 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 61465.19921875 +time 74.34 +26 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 31137.2109375 +time 1.30 +26 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 22068.560546875 +time 1.30 +26 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 5582.666015625 +time 67.28 +26 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 372075.3125 +time 133.29 +26 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 367292.0 +time 133.53 +26 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 23420.568359375 +time 132.82 +27 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 28320.3671875 +time 74.35 +27 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 13523.166015625 +time 1.32 +27 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 19238.0 +time 1.31 +27 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 2411.788330078125 +time 67.33 +27 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 394936.0 +time 133.34 +27 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 389494.90625 +time 133.66 +27 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 24680.826171875 +time 132.75 +28 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 50749.91796875 +time 74.33 +28 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 27150.32421875 +time 1.31 +28 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 28964.42578125 +time 1.31 +28 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 3809.74755859375 +time 67.31 +28 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 416990.125 +time 133.28 +28 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 411412.4375 +time 133.67 +28 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 25919.30078125 +time 132.69 +29 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 50315.03515625 +time 74.31 +29 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 23235.5703125 +time 1.31 +29 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 31637.900390625 +time 1.31 +29 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 4623.9306640625 +time 67.29 +29 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 441865.75 +time 133.23 +29 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 434700.3125 +time 133.61 +29 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 27187.61328125 +time 132.87 +30 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 26189.08203125 +time 74.31 +30 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 10043.103515625 +time 1.30 +30 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 29211.3203125 +time 1.30 +30 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 2231.01708984375 +time 67.23 +30 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 468689.40625 +time 133.22 +30 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 459814.4375 +time 133.69 +30 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 28129.546875 +time 132.72 +31 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 52598.1171875 +time 74.30 +31 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 23865.44921875 +time 1.32 +31 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 30826.552734375 +time 1.31 +31 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 4350.78466796875 +time 67.24 +31 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 489889.4375 +time 133.22 +31 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 479781.78125 +time 133.63 +31 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 28902.79296875 +time 132.81 +32 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 59903.83203125 +time 74.33 +32 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 29770.015625 +time 1.30 +32 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 28210.146484375 +time 1.31 +32 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 3051.3134765625 +time 67.29 +32 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 501656.375 +time 133.26 +32 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 492198.875 +time 133.63 +32 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 30499.484375 +time 132.80 +33 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 84420.5390625 +time 74.33 +33 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 39708.3359375 +time 1.32 +33 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 30832.232421875 +time 1.31 +33 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 6309.8017578125 +time 67.28 +33 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 506694.9375 +time 133.23 +33 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 498621.0625 +time 133.61 +33 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 34879.8671875 +time 132.80 +34 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 30183.8125 +time 74.33 +34 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 12323.650390625 +time 1.32 +34 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 18397.08203125 +time 1.30 +34 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 4200.53369140625 +time 67.30 +34 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 522844.46875 +time 133.29 +34 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 514039.8125 +time 133.61 +34 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 37937.6640625 +time 132.73 +35 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 50034.2109375 +time 74.31 +35 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 20875.783203125 +time 1.31 +35 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 25280.05859375 +time 1.31 +35 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 5350.72900390625 +time 67.26 +35 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 554782.75 +time 133.22 +35 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 545162.75 +time 133.58 +35 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 40659.3515625 +time 132.74 +36 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 48221.203125 +time 74.33 +36 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 20184.0546875 +time 1.31 +36 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 24801.09765625 +time 1.31 +36 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 3293.978759765625 +time 67.29 +36 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 567511.875 +time 133.26 +36 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 557947.1875 +time 133.73 +36 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 45245.12109375 +time 132.42 +37 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 75363.3671875 +time 74.31 +37 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 34463.5625 +time 1.31 +37 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 31608.99609375 +time 1.30 +37 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 6403.7568359375 +time 67.27 +37 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 591796.4375 +time 133.23 +37 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 582373.5625 +time 133.56 +37 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 49221.75390625 +time 132.79 +38 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 84602.09375 +time 74.30 +38 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 38396.80078125 +time 1.31 +38 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 32711.265625 +time 1.31 +38 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 10000.1728515625 +time 67.24 +38 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 608665.5 +time 133.21 +38 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 599951.25 +time 133.62 +38 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 55195.0625 +time 132.88 +39 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 103231.5703125 +time 74.33 +39 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 51982.09375 +time 1.31 +39 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 35540.984375 +time 1.31 +39 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 12025.228515625 +time 67.24 +39 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 613712.875 +time 133.23 +39 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 605436.125 +time 133.58 +39 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 67558.8984375 +time 132.79 +40 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 96430.1875 +time 74.29 +40 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 45236.8125 +time 1.31 +40 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 40018.015625 +time 1.31 +40 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 17218.5234375 +time 67.25 +40 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 636673.875 +time 133.22 +40 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 628154.125 +time 133.55 +40 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 76410.84375 +time 132.73 +41 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 77917.921875 +time 74.32 +41 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 35050.51953125 +time 1.33 +41 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 35219.1171875 +time 1.32 +41 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 18799.58984375 +time 67.24 +41 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 663693.875 +time 133.22 +41 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 643667.875 +time 133.58 +41 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 89795.3203125 +time 132.77 +42 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 89954.3125 +time 74.34 +42 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 38151.453125 +time 1.31 +42 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 43517.09375 +time 1.30 +42 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 17671.365234375 +time 67.29 +42 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 716675.5 +time 133.27 +42 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 679881.8125 +time 133.63 +42 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 97907.234375 +time 132.74 +43 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 62594.09375 +time 74.32 +43 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 30825.98828125 +time 1.32 +43 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 33883.3828125 +time 1.31 +43 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 12448.8505859375 +time 67.25 +43 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 744831.6875 +time 133.21 +43 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 699455.75 +time 133.56 +43 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 108464.546875 +time 132.79 +44 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 113070.2578125 +time 74.27 +44 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 40785.4375 +time 1.32 +44 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 53236.5546875 +time 1.31 +44 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 29389.97265625 +time 67.17 +44 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 816565.25 +time 133.19 +44 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 742064.5625 +time 133.56 +44 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 127401.671875 +time 132.59 +45 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 70641.8984375 +time 74.20 +45 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 32233.82421875 +time 1.30 +45 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 51841.7265625 +time 1.30 +45 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 8924.384765625 +time 67.08 +45 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 885152.25 +time 132.92 +45 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 794598.5 +time 133.24 +45 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 131311.15625 +time 132.44 +46 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 60269.14453125 +time 74.16 +46 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 26353.34765625 +time 1.31 +46 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 44948.98828125 +time 1.31 +46 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 11645.3173828125 +time 67.17 +46 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 951803.1875 +time 132.93 +46 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 841496.25 +time 133.29 +46 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 131733.21875 +time 132.53 +47 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 81897.6953125 +time 74.15 +47 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 37858.46875 +time 1.32 +47 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 43521.9765625 +time 1.31 +47 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 18492.826171875 +time 67.12 +47 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 987205.75 +time 132.90 +47 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 861385.25 +time 133.29 +47 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 148283.625 +time 132.50 +48 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 41700.375 +time 74.16 +48 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 14080.6494140625 +time 1.31 +48 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 43042.390625 +time 1.30 +48 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 12309.330078125 +time 67.13 +48 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1033761.25 +time 132.91 +48 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 897758.25 +time 133.28 +48 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 148257.171875 +time 132.48 +49 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 23691.1640625 +time 74.20 +49 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 9239.4814453125 +time 1.32 +49 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 34483.1796875 +time 1.32 +49 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 4694.615234375 +time 67.10 +49 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1075857.125 +time 132.94 +49 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 933454.375 +time 133.26 +49 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 148576.234375 +time 132.40 +50 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 37414.390625 +time 74.24 +50 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 12605.123046875 +time 1.32 +50 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 44598.6484375 +time 1.32 +50 self_attn.o_proj +Pruning ... +0.4999999552965164 0.15999996662139893 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591281689242464 0.9709505944546686 1.0 +err_prefin 8907.14453125 +time 67.17 +50 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1118057.5 +time 132.93 +50 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 966272.375 +time 133.41 +50 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 149397.03125 +time 132.36 +51 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 47924.5 +time 74.20 +51 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 19335.56640625 +time 1.32 +51 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 46904.40625 +time 1.31 +51 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 8192.44140625 +time 67.14 +51 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1150105.0 +time 132.93 +51 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 996269.5 +time 133.29 +51 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 149764.0 +time 132.40 +52 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 81044.359375 +time 74.25 +52 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 25878.015625 +time 1.32 +52 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 53787.68359375 +time 1.31 +52 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 16396.55859375 +time 67.15 +52 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1182271.0 +time 132.93 +52 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1023028.875 +time 133.21 +52 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 153306.609375 +time 132.50 +53 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 25691.26171875 +time 74.21 +53 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 7557.69873046875 +time 1.32 +53 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 44611.39453125 +time 1.32 +53 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 6824.3671875 +time 67.11 +53 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1213898.0 +time 132.92 +53 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1052797.5 +time 133.28 +53 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 155050.875 +time 132.36 +54 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 30003.078125 +time 74.26 +54 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 8660.60546875 +time 1.32 +54 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 46037.44140625 +time 1.31 +54 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 7636.07763671875 +time 67.18 +54 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1244314.875 +time 132.94 +54 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1081664.75 +time 133.38 +54 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 155016.25 +time 132.36 +55 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 48759.3203125 +time 74.22 +55 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 17041.306640625 +time 1.32 +55 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 59746.5390625 +time 1.31 +55 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 14929.080078125 +time 67.12 +55 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1268392.625 +time 132.95 +55 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1110917.0 +time 133.27 +55 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 158858.25 +time 132.37 +56 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 36526.109375 +time 74.24 +56 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 13544.583984375 +time 1.32 +56 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 42510.93359375 +time 1.32 +56 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 8095.60400390625 +time 67.19 +56 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1289659.125 +time 132.90 +56 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1132746.5 +time 133.39 +56 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 161251.5 +time 132.35 +57 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 33041.51171875 +time 74.19 +57 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 10192.71875 +time 1.31 +57 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 54615.609375 +time 1.30 +57 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 8536.6806640625 +time 67.14 +57 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1332029.5 +time 132.93 +57 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1171135.75 +time 133.24 +57 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 164085.640625 +time 132.37 +58 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 18912.63671875 +time 74.16 +58 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 5332.2275390625 +time 1.31 +58 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 39178.5859375 +time 1.31 +58 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 5707.21826171875 +time 67.13 +58 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1361064.0 +time 132.92 +58 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1199729.75 +time 133.21 +58 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 165043.921875 +time 132.43 +59 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 23990.17578125 +time 74.21 +59 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 6799.7958984375 +time 1.31 +59 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 43409.921875 +time 1.31 +59 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 5401.51904296875 +time 67.13 +59 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1394133.25 +time 132.93 +59 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1230493.375 +time 133.23 +59 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 165187.59375 +time 132.34 +60 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 4144.20751953125 +time 74.20 +60 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 1084.0653076171875 +time 1.31 +60 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 23588.876953125 +time 1.31 +60 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 3300.5615234375 +time 67.14 +60 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1415199.75 +time 132.91 +60 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1255729.75 +time 133.17 +60 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 164511.515625 +time 132.44 +61 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 15355.552734375 +time 74.20 +61 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 4016.667724609375 +time 1.31 +61 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 40092.5703125 +time 1.30 +61 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 5419.4111328125 +time 67.12 +61 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1442236.375 +time 132.93 +61 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1285856.25 +time 133.31 +61 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 165382.53125 +time 132.06 +62 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 16637.373046875 +time 74.18 +62 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 4831.4716796875 +time 1.31 +62 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 43328.0859375 +time 1.31 +62 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 4547.39990234375 +time 67.11 +62 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1474178.75 +time 132.90 +62 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1319586.25 +time 133.13 +62 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 167112.3125 +time 132.40 +63 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 15147.34765625 +time 74.16 +63 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 3676.107177734375 +time 1.30 +63 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 33171.86328125 +time 1.30 +63 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 3524.91455078125 +time 67.07 +63 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1508298.375 +time 132.94 +63 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1352186.375 +time 133.39 +63 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 169606.328125 +time 132.42 +64 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 27880.34375 +time 74.15 +64 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 8370.90625 +time 1.31 +64 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 49190.6015625 +time 1.31 +64 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 7736.96044921875 +time 67.11 +64 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1538812.625 +time 132.95 +64 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1384683.375 +time 133.30 +64 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 172562.8125 +time 132.45 +65 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 7831.9677734375 +time 74.14 +65 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 1802.909912109375 +time 1.31 +65 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 26999.32421875 +time 1.30 +65 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 3169.1650390625 +time 67.08 +65 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1565351.0 +time 132.92 +65 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1415175.0 +time 133.24 +65 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 175581.546875 +time 132.38 +66 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 13420.857421875 +time 74.15 +66 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 3411.757568359375 +time 1.29 +66 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 35757.55859375 +time 1.29 +66 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 6590.640625 +time 67.12 +66 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1602629.75 +time 132.93 +66 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1457893.25 +time 133.30 +66 mlp.down_proj +Pruning ... +0.4999999872275761 0.2499999701976776 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218706168299 0.9709505944546686 1.0 +err_prefin 183895.8125 +time 132.47 +67 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 6356.85546875 +time 74.18 +67 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 1641.073974609375 +time 1.32 +67 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 14184.7353515625 +time 1.30 +67 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 1797.1361083984375 +time 67.14 +67 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1619232.625 +time 132.93 +67 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1483684.5 +time 132.94 +67 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 185674.203125 +time 132.42 +68 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 51455.2265625 +time 74.16 +68 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 19140.79296875 +time 1.30 +68 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 71009.984375 +time 1.31 +68 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 7044.2236328125 +time 67.10 +68 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1677991.75 +time 132.91 +68 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1545929.75 +time 133.36 +68 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 198817.625 +time 132.33 +69 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 86279.734375 +time 74.18 +69 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 44683.25 +time 1.31 +69 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 93197.8984375 +time 1.30 +69 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 11332.5498046875 +time 67.12 +69 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1709753.375 +time 132.93 +69 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1592125.25 +time 133.28 +69 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 208887.15625 +time 132.43 +70 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 49584.6171875 +time 74.13 +70 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 19303.70703125 +time 1.31 +70 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 53245.4921875 +time 1.30 +70 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 8677.44140625 +time 67.11 +70 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1748143.75 +time 132.88 +70 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1638158.5 +time 133.25 +70 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 223469.34375 +time 132.44 +71 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 67144.390625 +time 74.13 +71 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 33651.921875 +time 1.31 +71 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 57274.30078125 +time 1.30 +71 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 11107.5009765625 +time 67.09 +71 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1803989.25 +time 132.92 +71 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1694213.25 +time 133.15 +71 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 236803.890625 +time 132.49 +72 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 93249.4453125 +time 74.11 +72 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 48147.8125 +time 1.30 +72 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 66679.4140625 +time 1.30 +72 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 14145.203125 +time 67.07 +72 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1846660.75 +time 132.88 +72 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1744824.75 +time 133.25 +72 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 254743.875 +time 132.37 +73 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 86651.828125 +time 74.13 +73 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 47317.703125 +time 1.30 +73 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 76097.953125 +time 1.29 +73 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 9291.65625 +time 67.05 +73 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1890848.875 +time 132.91 +73 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1795729.0 +time 133.20 +73 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 280777.6875 +time 132.30 +74 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 79523.671875 +time 74.14 +74 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 38743.5703125 +time 1.31 +74 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 57649.984375 +time 1.31 +74 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 18443.35546875 +time 67.11 +74 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1933389.5 +time 132.96 +74 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1842457.0 +time 133.33 +74 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 308752.71875 +time 132.40 +75 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 79962.3125 +time 74.14 +75 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 38553.9453125 +time 1.31 +75 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 64704.87109375 +time 1.30 +75 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 16449.65625 +time 67.11 +75 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1947772.75 +time 132.94 +75 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1854193.25 +time 133.29 +75 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 347022.1875 +time 132.32 +76 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 119896.9375 +time 74.09 +76 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 55633.9375 +time 1.31 +76 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 101904.0 +time 1.29 +76 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 59548.59765625 +time 67.11 +76 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1932840.0 +time 132.90 +76 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1824479.0 +time 133.25 +76 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 418164.375 +time 132.42 +77 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 78987.625 +time 74.10 +77 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 39827.34765625 +time 1.32 +77 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 68742.625 +time 1.30 +77 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 23459.712890625 +time 67.09 +77 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1798901.625 +time 132.93 +77 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1700200.5 +time 133.23 +77 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 480343.46875 +time 132.36 +78 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 70369.5625 +time 74.11 +78 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 37082.4140625 +time 1.32 +78 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 61058.3046875 +time 1.29 +78 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 14321.369140625 +time 67.13 +78 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1499885.5 +time 132.96 +78 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 1401595.5 +time 133.21 +78 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 511604.9375 +time 132.48 +79 self_attn.q_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 44149.421875 +time 74.07 +79 self_attn.k_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 29432.83984375 +time 1.30 +79 self_attn.v_proj +Pruning ... +0.4999997615814209 0.2499990463256836 0.46874988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0985899539830344 0.9709505944546686 1.0 +err_prefin 26794.783203125 +time 1.29 +79 self_attn.o_proj +Pruning ... +0.4999999701976776 0.15999998152256012 0.3399999886751175 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.5591282045725583 0.9709505944546686 1.0 +err_prefin 3699.406494140625 +time 67.04 +79 mlp.gate_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 915986.1875 +time 132.93 +79 mlp.up_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 836133.75 +time 133.28 +79 mlp.down_proj +Pruning ... +0.49999999148505075 0.2499999850988388 0.42857142431395395 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.2170218773647683 0.9709505944546686 1.0 +err_prefin 451949.9375 +time 132.50 +model.embed_tokens.weight tensor(2.5520e-06) +model.layers.0.self_attn.q_proj.weight tensor(0.0083) +model.layers.0.self_attn.k_proj.weight tensor(0.0117) +model.layers.0.self_attn.v_proj.weight tensor(0.0441) +model.layers.0.self_attn.o_proj.weight tensor(3.6061e-06) +model.layers.0.mlp.gate_proj.weight tensor(0.0001) +model.layers.0.mlp.up_proj.weight tensor(0.0001) +model.layers.0.mlp.down_proj.weight tensor(0.0047) +49773.809720277786 +Dataset: wikitext2 +Evaluating ... +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +Perplexity: 3.448766 diff --git a/logs/llama2-70-0.6 b/logs/llama2-70-0.6 new file mode 100644 index 0000000..15e7928 --- /dev/null +++ b/logs/llama2-70-0.6 @@ -0,0 +1,4017 @@ +Running on dev: cuda:0 +loading llama +llama loaded +Starting... on device cuda:0 +Ready. +0 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 17.612327575683594 +err_fin 7.10721492767334 +sparsity check 0.3999999612569809 +time 76.26 +0 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 31.51104736328125 +err_fin 10.96957015991211 +sparsity check 0.399999737739563 +time 1.34 +0 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 5.1145734786987305 +err_fin 2.7964329719543457 +sparsity check 0.399999737739563 +time 1.34 +0 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 0.7482728362083435 +err_fin 0.04435748606920242 +sparsity check 0.3999999612569809 +time 68.96 +0 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 472.1926574707031 +err_fin 168.95367431640625 +sparsity check 0.399999988930566 +time 138.68 +0 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 473.89642333984375 +err_fin 168.148193359375 +sparsity check 0.399999988930566 +time 139.05 +0 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 8.996854782104492 +err_fin 4.284056663513184 +sparsity check 0.399999988930566 +time 137.01 +1 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 324.388916015625 +err_fin 104.90257263183594 +sparsity check 0.3999999612569809 +time 75.98 +1 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 333.74029541015625 +err_fin 120.40553283691406 +sparsity check 0.399999737739563 +time 1.33 +1 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 36.60847473144531 +err_fin 19.74991226196289 +sparsity check 0.399999737739563 +time 1.32 +1 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 25.67422866821289 +err_fin 4.771961688995361 +sparsity check 0.3999999612569809 +time 68.78 +1 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3332.1962890625 +err_fin 1083.202392578125 +sparsity check 0.399999988930566 +time 138.79 +1 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3651.33837890625 +err_fin 1152.73779296875 +sparsity check 0.399999988930566 +time 139.22 +1 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 204.66514587402344 +err_fin 161.40135192871094 +sparsity check 0.399999988930566 +time 136.72 +2 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 1261.651123046875 +err_fin 597.3677978515625 +sparsity check 0.3999999612569809 +time 76.00 +2 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 1407.54248046875 +err_fin 703.7958984375 +sparsity check 0.399999737739563 +time 1.60 +2 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 172.35546875 +err_fin 115.82382202148438 +sparsity check 0.399999737739563 +time 1.33 +2 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 124.26980590820312 +err_fin 40.466644287109375 +sparsity check 0.3999999612569809 +time 68.89 +2 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 15045.37890625 +err_fin 6529.49267578125 +sparsity check 0.399999988930566 +time 138.69 +2 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 15851.59375 +err_fin 6701.36474609375 +sparsity check 0.399999988930566 +time 139.19 +2 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 745.590087890625 +err_fin 629.763671875 +sparsity check 0.399999988930566 +time 137.04 +3 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 10852.21484375 +err_fin 6075.66943359375 +sparsity check 0.3999999612569809 +time 75.88 +3 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 7197.013671875 +err_fin 4788.529296875 +sparsity check 0.399999737739563 +time 1.34 +3 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 2397.6689453125 +err_fin 1744.8912353515625 +sparsity check 0.399999737739563 +time 1.33 +3 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 242.5241241455078 +err_fin 114.71422576904297 +sparsity check 0.3999999612569809 +time 68.77 +3 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 37562.2109375 +err_fin 19615.8828125 +sparsity check 0.399999988930566 +time 138.70 +3 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 38100.703125 +err_fin 19501.314453125 +sparsity check 0.399999988930566 +time 138.94 +3 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1130.3961181640625 +err_fin 995.3624877929688 +sparsity check 0.399999988930566 +time 136.96 +4 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 22947.802734375 +err_fin 13750.658203125 +sparsity check 0.3999999612569809 +time 75.92 +4 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 10913.3994140625 +err_fin 7686.8955078125 +sparsity check 0.399999737739563 +time 1.60 +4 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 4031.020263671875 +err_fin 3118.88232421875 +sparsity check 0.399999737739563 +time 1.34 +4 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 293.3388671875 +err_fin 147.85208129882812 +sparsity check 0.3999999612569809 +time 68.83 +4 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 66397.1015625 +err_fin 38667.984375 +sparsity check 0.399999988930566 +time 139.03 +4 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 65667.421875 +err_fin 37706.22265625 +sparsity check 0.399999988930566 +time 139.43 +4 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1737.755615234375 +err_fin 1555.261962890625 +sparsity check 0.399999988930566 +time 137.11 +5 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 34087.51171875 +err_fin 21938.572265625 +sparsity check 0.3999999612569809 +time 75.87 +5 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 17835.97265625 +err_fin 13241.9794921875 +sparsity check 0.399999737739563 +time 1.34 +5 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 5898.74365234375 +err_fin 4773.126953125 +sparsity check 0.399999737739563 +time 1.34 +5 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 455.9298400878906 +err_fin 220.78146362304688 +sparsity check 0.3999999612569809 +time 68.79 +5 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 85977.9140625 +err_fin 53347.79296875 +sparsity check 0.399999988930566 +time 138.90 +5 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 84495.609375 +err_fin 51809.59375 +sparsity check 0.399999988930566 +time 139.19 +5 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2430.72412109375 +err_fin 2164.45361328125 +sparsity check 0.399999988930566 +time 136.95 +6 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 39206.7578125 +err_fin 26081.8671875 +sparsity check 0.3999999612569809 +time 75.93 +6 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 20082.53515625 +err_fin 15431.42578125 +sparsity check 0.399999737739563 +time 1.34 +6 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 6980.0126953125 +err_fin 5785.09619140625 +sparsity check 0.399999737739563 +time 1.34 +6 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 895.5304565429688 +err_fin 499.7837829589844 +sparsity check 0.3999999612569809 +time 68.96 +6 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 116574.0 +err_fin 74599.0078125 +sparsity check 0.399999988930566 +time 138.91 +6 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 113771.078125 +err_fin 72131.609375 +sparsity check 0.399999988930566 +time 139.30 +6 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3635.968505859375 +err_fin 3275.612060546875 +sparsity check 0.399999988930566 +time 136.93 +7 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 63573.28515625 +err_fin 44168.1796875 +sparsity check 0.3999999612569809 +time 75.86 +7 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 30488.50390625 +err_fin 24611.5703125 +sparsity check 0.399999737739563 +time 1.35 +7 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 9955.16796875 +err_fin 8486.0693359375 +sparsity check 0.399999737739563 +time 1.34 +7 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 1042.595947265625 +err_fin 554.978515625 +sparsity check 0.3999999612569809 +time 68.77 +7 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 146512.0 +err_fin 96909.8125 +sparsity check 0.399999988930566 +time 138.99 +7 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 142699.984375 +err_fin 93581.234375 +sparsity check 0.399999988930566 +time 139.29 +7 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 5133.5068359375 +err_fin 4621.236328125 +sparsity check 0.399999988930566 +time 137.02 +8 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 55519.2421875 +err_fin 39471.8671875 +sparsity check 0.3999999612569809 +time 75.89 +8 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 26569.623046875 +err_fin 21154.78125 +sparsity check 0.399999737739563 +time 1.34 +8 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 9536.263671875 +err_fin 8105.48583984375 +sparsity check 0.399999737739563 +time 1.34 +8 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 1802.17236328125 +err_fin 924.8701171875 +sparsity check 0.3999999612569809 +time 68.79 +8 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 205404.890625 +err_fin 140618.84375 +sparsity check 0.399999988930566 +time 138.85 +8 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 196496.6875 +err_fin 133449.53125 +sparsity check 0.399999988930566 +time 139.16 +8 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 8890.8037109375 +err_fin 8498.9755859375 +sparsity check 0.399999988930566 +time 136.99 +9 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 62084.6328125 +err_fin 46541.3984375 +sparsity check 0.3999999612569809 +time 75.88 +9 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 30079.623046875 +err_fin 25589.6484375 +sparsity check 0.399999737739563 +time 1.35 +9 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 14775.11328125 +err_fin 13083.171875 +sparsity check 0.399999737739563 +time 1.33 +9 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 1073.184326171875 +err_fin 521.8529663085938 +sparsity check 0.3999999612569809 +time 68.79 +9 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 267515.09375 +err_fin 189444.0 +sparsity check 0.399999988930566 +time 138.95 +9 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 254250.46875 +err_fin 178660.0625 +sparsity check 0.399999988930566 +time 139.31 +9 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 9258.24609375 +err_fin 8459.10546875 +sparsity check 0.399999988930566 +time 136.99 +10 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 36642.8828125 +err_fin 27778.05078125 +sparsity check 0.3999999612569809 +time 75.87 +10 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 14996.3779296875 +err_fin 12421.6953125 +sparsity check 0.399999737739563 +time 1.51 +10 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 14389.373046875 +err_fin 12692.5615234375 +sparsity check 0.399999737739563 +time 1.32 +10 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 734.5894775390625 +err_fin 382.73675537109375 +sparsity check 0.3999999612569809 +time 68.80 +10 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 329132.5625 +err_fin 242656.875 +sparsity check 0.399999988930566 +time 138.84 +10 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 311480.8125 +err_fin 228053.5625 +sparsity check 0.399999988930566 +time 139.16 +10 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 10280.35546875 +err_fin 9502.23828125 +sparsity check 0.399999988930566 +time 136.95 +11 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 59513.4375 +err_fin 45776.640625 +sparsity check 0.3999999612569809 +time 75.88 +11 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 30693.546875 +err_fin 25851.52734375 +sparsity check 0.399999737739563 +time 1.35 +11 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 16623.55078125 +err_fin 14633.64453125 +sparsity check 0.399999737739563 +time 1.33 +11 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 1155.1929931640625 +err_fin 583.9664306640625 +sparsity check 0.3999999612569809 +time 68.86 +11 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 355986.09375 +err_fin 266137.0 +sparsity check 0.399999988930566 +time 138.91 +11 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 338211.9375 +err_fin 251343.90625 +sparsity check 0.399999988930566 +time 139.31 +11 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 11358.93359375 +err_fin 10526.056640625 +sparsity check 0.399999988930566 +time 136.99 +12 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 51019.9609375 +err_fin 39177.4921875 +sparsity check 0.3999999612569809 +time 75.86 +12 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 26892.7421875 +err_fin 22232.98046875 +sparsity check 0.399999737739563 +time 1.34 +12 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 12268.361328125 +err_fin 10614.7900390625 +sparsity check 0.399999737739563 +time 1.34 +12 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 1459.8062744140625 +err_fin 724.5511474609375 +sparsity check 0.3999999612569809 +time 68.85 +12 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 352586.125 +err_fin 264577.9375 +sparsity check 0.399999988930566 +time 138.52 +12 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 337908.65625 +err_fin 252293.359375 +sparsity check 0.399999988930566 +time 139.24 +12 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 12551.25 +err_fin 11571.958984375 +sparsity check 0.399999988930566 +time 136.99 +13 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 64947.30859375 +err_fin 50110.44140625 +sparsity check 0.3999999612569809 +time 75.89 +13 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 29089.6328125 +err_fin 24156.20703125 +sparsity check 0.399999737739563 +time 1.35 +13 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 16743.55859375 +err_fin 14742.77734375 +sparsity check 0.399999737739563 +time 1.34 +13 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 3752.18212890625 +err_fin 2130.10791015625 +sparsity check 0.3999999612569809 +time 68.74 +13 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 372413.25 +err_fin 276258.03125 +sparsity check 0.399999988930566 +time 138.88 +13 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 360113.34375 +err_fin 265735.53125 +sparsity check 0.399999988930566 +time 139.38 +13 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 15820.6875 +err_fin 14581.4482421875 +sparsity check 0.399999988930566 +time 137.08 +14 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 108299.484375 +err_fin 84146.359375 +sparsity check 0.3999999612569809 +time 75.87 +14 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 49305.0703125 +err_fin 41669.30078125 +sparsity check 0.399999737739563 +time 1.60 +14 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 23381.0 +err_fin 20784.14453125 +sparsity check 0.399999737739563 +time 1.34 +14 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 4317.4189453125 +err_fin 2290.507568359375 +sparsity check 0.3999999612569809 +time 68.84 +14 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 433300.75 +err_fin 328536.46875 +sparsity check 0.399999988930566 +time 138.93 +14 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 419087.53125 +err_fin 316094.875 +sparsity check 0.399999988930566 +time 139.28 +14 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 17131.1015625 +err_fin 15888.0390625 +sparsity check 0.399999988930566 +time 136.99 +15 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 78850.65625 +err_fin 62189.6796875 +sparsity check 0.3999999612569809 +time 75.85 +15 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 34686.796875 +err_fin 29638.791015625 +sparsity check 0.399999737739563 +time 1.34 +15 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 29270.41015625 +err_fin 26060.15234375 +sparsity check 0.399999737739563 +time 1.34 +15 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 4507.93359375 +err_fin 2499.28173828125 +sparsity check 0.3999999612569809 +time 68.86 +15 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 464740.5625 +err_fin 353674.90625 +sparsity check 0.399999988930566 +time 138.97 +15 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 451478.0625 +err_fin 341646.78125 +sparsity check 0.399999988930566 +time 139.25 +15 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 20231.583984375 +err_fin 18674.9375 +sparsity check 0.399999988930566 +time 137.06 +16 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 77341.2265625 +err_fin 61628.3671875 +sparsity check 0.3999999612569809 +time 75.87 +16 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 35855.0703125 +err_fin 30790.6484375 +sparsity check 0.399999737739563 +time 1.35 +16 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 26911.302734375 +err_fin 24213.021484375 +sparsity check 0.399999737739563 +time 1.34 +16 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 3601.39013671875 +err_fin 2004.331787109375 +sparsity check 0.3999999612569809 +time 68.81 +16 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 491135.875 +err_fin 378446.3125 +sparsity check 0.399999988930566 +time 138.86 +16 mlp.up_proj +Pruning ... +0.39999998467309134 0.2499999701976776 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1452538745832397 0.9709505944546686 1.0 +err_prefin 478407.0 +err_fin 367127.875 +sparsity check 0.39999998467309134 +time 139.24 +16 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 20346.81640625 +err_fin 18934.5859375 +sparsity check 0.399999988930566 +time 137.03 +17 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 99662.6875 +err_fin 78476.28125 +sparsity check 0.3999999612569809 +time 75.85 +17 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 46874.953125 +err_fin 39818.0390625 +sparsity check 0.399999737739563 +time 1.34 +17 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 22943.46875 +err_fin 20219.392578125 +sparsity check 0.399999737739563 +time 1.33 +17 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 6161.6611328125 +err_fin 2948.42724609375 +sparsity check 0.3999999612569809 +time 68.78 +17 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 482081.9375 +err_fin 365778.3125 +sparsity check 0.399999988930566 +time 138.61 +17 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 471164.75 +err_fin 355606.4375 +sparsity check 0.399999988930566 +time 139.27 +17 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 26453.935546875 +err_fin 23957.19140625 +sparsity check 0.399999988930566 +time 136.96 +18 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 93042.890625 +err_fin 73869.5 +sparsity check 0.3999999612569809 +time 75.86 +18 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 41653.34375 +err_fin 35923.51953125 +sparsity check 0.399999737739563 +time 1.35 +18 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 36575.17578125 +err_fin 33034.24609375 +sparsity check 0.399999737739563 +time 1.33 +18 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 6069.55908203125 +err_fin 3101.435302734375 +sparsity check 0.3999999612569809 +time 69.00 +18 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 533570.6875 +err_fin 407169.625 +sparsity check 0.399999988930566 +time 138.95 +18 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 518153.1875 +err_fin 393194.15625 +sparsity check 0.399999988930566 +time 138.80 +18 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 28997.03125 +err_fin 26308.62890625 +sparsity check 0.399999988930566 +time 136.91 +19 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 112864.234375 +err_fin 90097.75 +sparsity check 0.3999999612569809 +time 75.87 +19 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 49256.0 +err_fin 43416.234375 +sparsity check 0.399999737739563 +time 1.58 +19 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 43989.640625 +err_fin 40220.83203125 +sparsity check 0.399999737739563 +time 1.33 +19 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 4828.154296875 +err_fin 2734.393310546875 +sparsity check 0.3999999612569809 +time 68.76 +19 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 582081.125 +err_fin 448252.40625 +sparsity check 0.399999988930566 +time 138.88 +19 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 565899.625 +err_fin 433865.1875 +sparsity check 0.399999988930566 +time 139.30 +19 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 29079.490234375 +err_fin 26786.86328125 +sparsity check 0.399999988930566 +time 137.13 +20 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 100119.140625 +err_fin 80946.3359375 +sparsity check 0.3999999612569809 +time 75.88 +20 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 42183.84375 +err_fin 37111.1953125 +sparsity check 0.399999737739563 +time 1.34 +20 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 36190.59765625 +err_fin 33309.046875 +sparsity check 0.399999737739563 +time 1.34 +20 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 3647.30126953125 +err_fin 1786.774169921875 +sparsity check 0.3999999612569809 +time 68.82 +20 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 627969.375 +err_fin 487027.125 +sparsity check 0.399999988930566 +time 138.88 +20 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 611342.3125 +err_fin 471954.09375 +sparsity check 0.399999988930566 +time 139.23 +20 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 29551.892578125 +err_fin 27422.1015625 +sparsity check 0.399999988930566 +time 136.94 +21 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 61035.453125 +err_fin 49399.328125 +sparsity check 0.3999999612569809 +time 75.91 +21 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 23059.736328125 +err_fin 19896.216796875 +sparsity check 0.399999737739563 +time 1.36 +21 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 30783.630859375 +err_fin 27992.6484375 +sparsity check 0.399999737739563 +time 1.35 +21 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 5241.9404296875 +err_fin 2848.85009765625 +sparsity check 0.3999999612569809 +time 68.77 +21 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 655856.75 +err_fin 509202.71875 +sparsity check 0.399999988930566 +time 138.90 +21 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 641574.125 +err_fin 495852.375 +sparsity check 0.399999988930566 +time 139.24 +21 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 31134.962890625 +err_fin 29017.095703125 +sparsity check 0.399999988930566 +time 136.99 +22 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 58833.91015625 +err_fin 47792.2109375 +sparsity check 0.3999999612569809 +time 75.88 +22 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 26931.833984375 +err_fin 23634.6015625 +sparsity check 0.399999737739563 +time 1.34 +22 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 35195.21875 +err_fin 32346.142578125 +sparsity check 0.399999737739563 +time 1.34 +22 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 3497.6494140625 +err_fin 1750.6719970703125 +sparsity check 0.3999999612569809 +time 68.94 +22 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 689108.0625 +err_fin 535977.375 +sparsity check 0.399999988930566 +time 138.90 +22 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 675351.75 +err_fin 523008.375 +sparsity check 0.399999988930566 +time 138.93 +22 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 31923.92578125 +err_fin 29884.91796875 +sparsity check 0.399999988930566 +time 136.99 +23 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 86296.59375 +err_fin 69640.4375 +sparsity check 0.3999999612569809 +time 75.90 +23 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 42529.94140625 +err_fin 37186.515625 +sparsity check 0.399999737739563 +time 1.35 +23 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 36995.01171875 +err_fin 33991.26953125 +sparsity check 0.399999737739563 +time 1.32 +23 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 4822.01953125 +err_fin 2533.981689453125 +sparsity check 0.3999999612569809 +time 68.82 +23 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 707328.75 +err_fin 551112.6875 +sparsity check 0.399999988930566 +time 138.60 +23 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 694757.25 +err_fin 538941.1875 +sparsity check 0.399999988930566 +time 138.92 +23 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 33092.3984375 +err_fin 30912.99609375 +sparsity check 0.399999988930566 +time 136.92 +24 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 76317.9140625 +err_fin 61327.4609375 +sparsity check 0.3999999612569809 +time 75.85 +24 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 37193.29296875 +err_fin 32230.69921875 +sparsity check 0.399999737739563 +time 1.34 +24 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 27487.359375 +err_fin 24762.923828125 +sparsity check 0.399999737739563 +time 1.34 +24 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 4445.7109375 +err_fin 2407.4091796875 +sparsity check 0.3999999612569809 +time 68.76 +24 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 686851.3125 +err_fin 536282.875 +sparsity check 0.399999988930566 +time 138.85 +24 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 678956.625 +err_fin 528484.25 +sparsity check 0.399999988930566 +time 138.90 +24 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 33274.2890625 +err_fin 31157.46484375 +sparsity check 0.399999988930566 +time 137.00 +25 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 86498.2265625 +err_fin 69604.9609375 +sparsity check 0.3999999612569809 +time 76.22 +25 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 41965.0625 +err_fin 35911.8828125 +sparsity check 0.399999737739563 +time 1.59 +25 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 28663.859375 +err_fin 25574.87109375 +sparsity check 0.399999737739563 +time 1.35 +25 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 11528.775390625 +err_fin 6194.421875 +sparsity check 0.3999999612569809 +time 69.13 +25 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 645389.6875 +err_fin 491102.8125 +sparsity check 0.399999988930566 +time 139.51 +25 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 642249.5625 +err_fin 486831.65625 +sparsity check 0.399999988930566 +time 139.87 +25 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 45862.3359375 +err_fin 42062.7734375 +sparsity check 0.399999988930566 +time 137.54 +26 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 128960.375 +err_fin 102998.5 +sparsity check 0.3999999612569809 +time 76.19 +26 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 61280.0390625 +err_fin 53391.234375 +sparsity check 0.399999737739563 +time 1.36 +26 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 43186.40625 +err_fin 38870.4375 +sparsity check 0.399999737739563 +time 1.35 +26 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 11384.138671875 +err_fin 6428.9443359375 +sparsity check 0.3999999612569809 +time 69.09 +26 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 738151.125 +err_fin 568121.5 +sparsity check 0.399999988930566 +time 139.56 +26 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 731963.625 +err_fin 560884.875 +sparsity check 0.399999988930566 +time 139.79 +26 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 45562.1796875 +err_fin 42632.48046875 +sparsity check 0.399999988930566 +time 137.47 +27 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 57331.1796875 +err_fin 46671.7265625 +sparsity check 0.3999999612569809 +time 76.23 +27 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 27610.8828125 +err_fin 24122.611328125 +sparsity check 0.399999737739563 +time 1.36 +27 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 37264.8125 +err_fin 34419.55078125 +sparsity check 0.399999737739563 +time 1.35 +27 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 4909.48095703125 +err_fin 2435.118896484375 +sparsity check 0.3999999612569809 +time 69.12 +27 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 784413.625 +err_fin 607624.0 +sparsity check 0.399999988930566 +time 139.61 +27 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 776550.625 +err_fin 598907.0625 +sparsity check 0.399999988930566 +time 139.78 +27 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 47765.3046875 +err_fin 44880.6015625 +sparsity check 0.399999988930566 +time 137.58 +28 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 105145.84375 +err_fin 85558.984375 +sparsity check 0.3999999612569809 +time 76.22 +28 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 53635.23828125 +err_fin 47616.234375 +sparsity check 0.399999737739563 +time 1.35 +28 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 56563.3359375 +err_fin 52054.6796875 +sparsity check 0.399999737739563 +time 1.34 +28 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 8209.810546875 +err_fin 4685.265625 +sparsity check 0.3999999612569809 +time 69.12 +28 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 828677.625 +err_fin 646741.625 +sparsity check 0.399999988930566 +time 139.55 +28 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 820765.8125 +err_fin 637406.125 +sparsity check 0.399999988930566 +time 139.51 +28 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 50522.453125 +err_fin 47658.9921875 +sparsity check 0.399999988930566 +time 137.72 +29 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 103383.8203125 +err_fin 84504.015625 +sparsity check 0.3999999612569809 +time 76.23 +29 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 47313.234375 +err_fin 41837.9296875 +sparsity check 0.399999737739563 +time 1.34 +29 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 61706.8125 +err_fin 56952.3203125 +sparsity check 0.399999737739563 +time 1.35 +29 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 9612.470703125 +err_fin 5473.2080078125 +sparsity check 0.3999999612569809 +time 69.11 +29 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 877926.1875 +err_fin 688177.9375 +sparsity check 0.399999988930566 +time 139.71 +29 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 868305.0625 +err_fin 677128.125 +sparsity check 0.399999988930566 +time 140.03 +29 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 53114.984375 +err_fin 50312.1875 +sparsity check 0.399999988930566 +time 137.67 +30 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 52442.41015625 +err_fin 43001.96484375 +sparsity check 0.3999999612569809 +time 76.20 +30 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 19952.232421875 +err_fin 17613.3671875 +sparsity check 0.399999737739563 +time 1.36 +30 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 56886.75 +err_fin 52822.890625 +sparsity check 0.399999737739563 +time 1.34 +30 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 4728.763671875 +err_fin 2648.462890625 +sparsity check 0.3999999612569809 +time 69.11 +30 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 932318.375 +err_fin 734010.1875 +sparsity check 0.399999988930566 +time 139.44 +30 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 918637.375 +err_fin 719568.6875 +sparsity check 0.399999988930566 +time 139.80 +30 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 54958.6796875 +err_fin 52124.546875 +sparsity check 0.399999988930566 +time 137.53 +31 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 108229.6796875 +err_fin 88773.578125 +sparsity check 0.3999999612569809 +time 76.20 +31 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 48303.6796875 +err_fin 42885.78515625 +sparsity check 0.399999737739563 +time 1.55 +31 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 60604.79296875 +err_fin 55323.4296875 +sparsity check 0.399999737739563 +time 1.36 +31 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 9769.052734375 +err_fin 5960.439453125 +sparsity check 0.3999999612569809 +time 69.11 +31 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 973989.125 +err_fin 772843.5625 +sparsity check 0.399999988930566 +time 139.68 +31 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 957890.5625 +err_fin 756552.75 +sparsity check 0.399999988930566 +time 139.93 +31 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 56087.5 +err_fin 53318.90625 +sparsity check 0.399999988930566 +time 137.71 +32 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 125380.25 +err_fin 103287.515625 +sparsity check 0.3999999612569809 +time 76.21 +32 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 58345.3125 +err_fin 52415.5078125 +sparsity check 0.399999737739563 +time 1.36 +32 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 54671.87890625 +err_fin 50620.2890625 +sparsity check 0.399999737739563 +time 1.34 +32 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 6489.8603515625 +err_fin 3653.179931640625 +sparsity check 0.3999999612569809 +time 69.26 +32 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 997853.0 +err_fin 789553.6875 +sparsity check 0.399999988930566 +time 139.64 +32 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 983279.6875 +err_fin 774578.5625 +sparsity check 0.399999988930566 +time 139.48 +32 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 59351.0078125 +err_fin 56297.09375 +sparsity check 0.399999988930566 +time 137.38 +33 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 178617.0625 +err_fin 145935.265625 +sparsity check 0.3999999612569809 +time 76.17 +33 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 77837.015625 +err_fin 69017.921875 +sparsity check 0.399999737739563 +time 1.54 +33 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 60248.34375 +err_fin 55033.7421875 +sparsity check 0.399999737739563 +time 1.35 +33 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 12679.162109375 +err_fin 7037.369140625 +sparsity check 0.3999999612569809 +time 69.08 +33 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1008519.125 +err_fin 788506.9375 +sparsity check 0.399999988930566 +time 139.55 +33 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 996504.875 +err_fin 775590.75 +sparsity check 0.399999988930566 +time 139.99 +33 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 67620.734375 +err_fin 63801.515625 +sparsity check 0.399999988930566 +time 137.68 +34 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 60917.2109375 +err_fin 49612.73046875 +sparsity check 0.3999999612569809 +time 76.20 +34 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 25374.015625 +err_fin 21585.5703125 +sparsity check 0.399999737739563 +time 1.55 +34 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 35895.7421875 +err_fin 32282.359375 +sparsity check 0.399999737739563 +time 1.34 +34 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 8630.1640625 +err_fin 4882.9326171875 +sparsity check 0.3999999612569809 +time 69.16 +34 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1040259.625 +err_fin 817221.8125 +sparsity check 0.399999988930566 +time 139.66 +34 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1027032.3125 +err_fin 803372.0 +sparsity check 0.399999988930566 +time 139.64 +34 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 73886.59375 +err_fin 70010.28125 +sparsity check 0.399999988930566 +time 137.40 +35 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 104002.0 +err_fin 85301.984375 +sparsity check 0.3999999612569809 +time 76.16 +35 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 41951.890625 +err_fin 36891.9296875 +sparsity check 0.399999737739563 +time 1.35 +35 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 49134.27734375 +err_fin 45119.69140625 +sparsity check 0.399999737739563 +time 1.34 +35 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 10896.05078125 +err_fin 5893.8515625 +sparsity check 0.3999999612569809 +time 69.19 +35 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1104395.25 +err_fin 870039.1875 +sparsity check 0.399999988930566 +time 139.63 +35 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1090234.75 +err_fin 854829.125 +sparsity check 0.399999988930566 +time 139.66 +35 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 79348.03125 +err_fin 75311.671875 +sparsity check 0.399999988930566 +time 137.72 +36 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 99297.703125 +err_fin 81572.9140625 +sparsity check 0.3999999612569809 +time 76.18 +36 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 41102.11328125 +err_fin 36326.3125 +sparsity check 0.399999737739563 +time 1.34 +36 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 47730.203125 +err_fin 44085.8984375 +sparsity check 0.399999737739563 +time 1.33 +36 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 6881.4716796875 +err_fin 3650.48974609375 +sparsity check 0.3999999612569809 +time 69.09 +36 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1129398.75 +err_fin 888594.625 +sparsity check 0.399999988930566 +time 139.26 +36 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1116524.375 +err_fin 874378.0 +sparsity check 0.399999988930566 +time 139.82 +36 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 88429.6796875 +err_fin 83697.421875 +sparsity check 0.399999988930566 +time 137.34 +37 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 158881.21875 +err_fin 129712.421875 +sparsity check 0.3999999612569809 +time 76.18 +37 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 69050.1796875 +err_fin 61238.65625 +sparsity check 0.399999737739563 +time 1.35 +37 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 61172.625 +err_fin 56369.76953125 +sparsity check 0.399999737739563 +time 1.36 +37 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 12933.6025390625 +err_fin 6396.13232421875 +sparsity check 0.3999999612569809 +time 69.12 +37 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1179273.25 +err_fin 921774.0 +sparsity check 0.399999988930566 +time 139.63 +37 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1166277.25 +err_fin 907425.0 +sparsity check 0.399999988930566 +time 139.56 +37 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 96837.4765625 +err_fin 91560.8828125 +sparsity check 0.399999988930566 +time 137.60 +38 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 178652.953125 +err_fin 144669.875 +sparsity check 0.3999999612569809 +time 76.19 +38 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 76139.265625 +err_fin 66612.6875 +sparsity check 0.399999737739563 +time 1.54 +38 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 63959.2890625 +err_fin 57992.68359375 +sparsity check 0.399999737739563 +time 1.35 +38 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 20120.30078125 +err_fin 10196.248046875 +sparsity check 0.3999999612569809 +time 69.11 +38 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1214896.0 +err_fin 946970.75 +sparsity check 0.399999988930566 +time 139.59 +38 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1203742.375 +err_fin 934144.5 +sparsity check 0.399999988930566 +time 140.02 +38 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 108883.90625 +err_fin 102736.2578125 +sparsity check 0.399999988930566 +time 137.56 +39 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 218867.359375 +err_fin 176544.625 +sparsity check 0.3999999612569809 +time 76.22 +39 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 101190.484375 +err_fin 89251.7421875 +sparsity check 0.399999737739563 +time 1.54 +39 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 69442.984375 +err_fin 63092.515625 +sparsity check 0.399999737739563 +time 1.36 +39 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 24446.88671875 +err_fin 11920.4208984375 +sparsity check 0.3999999612569809 +time 69.10 +39 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1224954.75 +err_fin 943958.375 +sparsity check 0.399999988930566 +time 139.67 +39 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1215661.75 +err_fin 932604.3125 +sparsity check 0.399999988930566 +time 140.07 +39 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 132942.28125 +err_fin 123923.03125 +sparsity check 0.399999988930566 +time 137.73 +40 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 204391.59375 +err_fin 162348.875 +sparsity check 0.3999999612569809 +time 76.07 +40 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 87739.265625 +err_fin 77051.0703125 +sparsity check 0.399999737739563 +time 1.37 +40 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 79062.90625 +err_fin 70670.21875 +sparsity check 0.399999737739563 +time 1.35 +40 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 36004.8515625 +err_fin 20871.0546875 +sparsity check 0.3999999612569809 +time 69.02 +40 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1269506.0 +err_fin 963220.25 +sparsity check 0.399999988930566 +time 139.36 +40 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1259459.25 +err_fin 950523.0625 +sparsity check 0.399999988930566 +time 139.79 +40 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 150879.421875 +err_fin 140773.90625 +sparsity check 0.399999988930566 +time 137.17 +41 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 164681.625 +err_fin 129744.1875 +sparsity check 0.3999999612569809 +time 76.05 +41 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 68598.9609375 +err_fin 59495.703125 +sparsity check 0.399999737739563 +time 1.36 +41 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 69201.7265625 +err_fin 62056.1484375 +sparsity check 0.399999737739563 +time 1.34 +41 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 39444.0546875 +err_fin 20136.62890625 +sparsity check 0.3999999612569809 +time 68.96 +41 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1322639.75 +err_fin 983274.25 +sparsity check 0.399999988930566 +time 139.18 +41 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1289896.75 +err_fin 952170.625 +sparsity check 0.399999988930566 +time 139.55 +41 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 177016.734375 +err_fin 163459.8125 +sparsity check 0.399999988930566 +time 137.58 +42 self_attn.q_proj +Pruning ... +0.3999999463558197 0.15999996662139893 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497204719225 0.9709505944546686 1.0 +err_prefin 190901.5 +err_fin 148293.4375 +sparsity check 0.3999999463558197 +time 76.04 +42 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 74717.453125 +err_fin 64965.5390625 +sparsity check 0.399999737739563 +time 1.34 +42 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 85384.0625 +err_fin 77046.2265625 +sparsity check 0.399999737739563 +time 1.34 +42 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 35428.703125 +err_fin 19269.55078125 +sparsity check 0.3999999612569809 +time 68.99 +42 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1427233.25 +err_fin 1051851.5 +sparsity check 0.399999988930566 +time 139.40 +42 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1362717.5 +err_fin 997086.8125 +sparsity check 0.399999988930566 +time 139.50 +42 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 192777.53125 +err_fin 177533.296875 +sparsity check 0.399999988930566 +time 137.38 +43 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 131945.078125 +err_fin 102151.328125 +sparsity check 0.3999999612569809 +time 76.02 +43 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 60967.8359375 +err_fin 52683.7265625 +sparsity check 0.399999737739563 +time 1.34 +43 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 66663.3828125 +err_fin 58915.53125 +sparsity check 0.399999737739563 +time 1.33 +43 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 26005.5078125 +err_fin 15426.9609375 +sparsity check 0.3999999612569809 +time 68.93 +43 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1484124.125 +err_fin 1087662.0 +sparsity check 0.399999988930566 +time 139.29 +43 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1403686.25 +err_fin 1020918.5625 +sparsity check 0.399999988930566 +time 139.63 +43 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 213179.8125 +err_fin 195941.1875 +sparsity check 0.399999988930566 +time 137.40 +44 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 239021.375 +err_fin 183875.03125 +sparsity check 0.3999999612569809 +time 76.07 +44 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 78612.90625 +err_fin 69156.46875 +sparsity check 0.399999737739563 +time 1.36 +44 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 103914.4921875 +err_fin 94715.125 +sparsity check 0.399999737739563 +time 1.34 +44 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 64961.6015625 +err_fin 38213.140625 +sparsity check 0.3999999612569809 +time 69.04 +44 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1625958.0 +err_fin 1176143.25 +sparsity check 0.399999988930566 +time 138.99 +44 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1487921.75 +err_fin 1065628.0 +sparsity check 0.399999988930566 +time 139.33 +44 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 248835.6875 +err_fin 224877.890625 +sparsity check 0.399999988930566 +time 137.38 +45 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 149267.625 +err_fin 113006.1171875 +sparsity check 0.3999999612569809 +time 76.08 +45 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 64247.765625 +err_fin 55482.5546875 +sparsity check 0.399999737739563 +time 1.56 +45 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 101248.5 +err_fin 91964.640625 +sparsity check 0.399999737739563 +time 1.35 +45 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 19453.009765625 +err_fin 11711.2490234375 +sparsity check 0.3999999612569809 +time 68.94 +45 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1761874.75 +err_fin 1274789.375 +sparsity check 0.399999988930566 +time 139.35 +45 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1593797.0 +err_fin 1140697.625 +sparsity check 0.399999988930566 +time 139.61 +45 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 257242.65625 +err_fin 232960.640625 +sparsity check 0.399999988930566 +time 137.42 +46 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 126243.0859375 +err_fin 95670.484375 +sparsity check 0.3999999612569809 +time 76.07 +46 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 51350.12109375 +err_fin 45076.40625 +sparsity check 0.399999737739563 +time 1.35 +46 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 87394.296875 +err_fin 80037.828125 +sparsity check 0.399999737739563 +time 1.34 +46 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 25115.828125 +err_fin 15321.5966796875 +sparsity check 0.3999999612569809 +time 69.00 +46 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1894083.375 +err_fin 1376017.5 +sparsity check 0.399999988930566 +time 139.33 +46 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1689111.25 +err_fin 1214059.25 +sparsity check 0.399999988930566 +time 139.33 +46 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 258656.53125 +err_fin 235855.484375 +sparsity check 0.399999988930566 +time 136.96 +47 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 172369.34375 +err_fin 131557.046875 +sparsity check 0.3999999612569809 +time 76.03 +47 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 74734.03125 +err_fin 65136.32421875 +sparsity check 0.399999737739563 +time 1.35 +47 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 85362.28125 +err_fin 77233.171875 +sparsity check 0.399999737739563 +time 1.34 +47 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 40525.57421875 +err_fin 19895.736328125 +sparsity check 0.3999999612569809 +time 68.99 +47 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1963942.25 +err_fin 1412017.5 +sparsity check 0.399999988930566 +time 139.35 +47 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1727577.875 +err_fin 1228149.125 +sparsity check 0.399999988930566 +time 139.30 +47 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 290418.71875 +err_fin 261644.28125 +sparsity check 0.399999988930566 +time 137.45 +48 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 85049.4375 +err_fin 64390.56640625 +sparsity check 0.3999999612569809 +time 76.02 +48 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 28543.0703125 +err_fin 24567.41015625 +sparsity check 0.399999737739563 +time 1.34 +48 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 83328.1875 +err_fin 75792.25 +sparsity check 0.399999737739563 +time 1.33 +48 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 26175.41796875 +err_fin 15631.171875 +sparsity check 0.3999999612569809 +time 68.92 +48 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2057025.5 +err_fin 1472035.75 +sparsity check 0.399999988930566 +time 139.17 +48 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1801691.125 +err_fin 1274622.875 +sparsity check 0.399999988930566 +time 139.25 +48 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 290596.0 +err_fin 263112.4375 +sparsity check 0.399999988930566 +time 137.02 +49 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 47123.171875 +err_fin 35557.078125 +sparsity check 0.3999999612569809 +time 76.01 +49 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 19124.44921875 +err_fin 16366.7763671875 +sparsity check 0.399999737739563 +time 1.54 +49 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 67332.96875 +err_fin 60125.21875 +sparsity check 0.399999737739563 +time 1.34 +49 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 10219.5615234375 +err_fin 5539.33056640625 +sparsity check 0.3999999612569809 +time 68.93 +49 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2140271.75 +err_fin 1531680.75 +sparsity check 0.399999988930566 +time 139.09 +49 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1872370.875 +err_fin 1324871.875 +sparsity check 0.399999988930566 +time 139.61 +49 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 291743.5625 +err_fin 265395.375 +sparsity check 0.399999988930566 +time 137.14 +50 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 76211.046875 +err_fin 57330.80859375 +sparsity check 0.3999999612569809 +time 76.01 +50 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 25855.921875 +err_fin 22361.712890625 +sparsity check 0.399999737739563 +time 1.36 +50 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 86781.0078125 +err_fin 77938.0703125 +sparsity check 0.399999737739563 +time 1.34 +50 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 19982.37890625 +err_fin 11792.8759765625 +sparsity check 0.3999999612569809 +time 68.97 +50 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2224319.5 +err_fin 1593539.125 +sparsity check 0.399999988930566 +time 139.31 +50 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1938637.0 +err_fin 1373849.125 +sparsity check 0.399999988930566 +time 139.27 +50 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 293277.4375 +err_fin 267518.5625 +sparsity check 0.399999988930566 +time 137.13 +51 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 99279.4375 +err_fin 74759.390625 +sparsity check 0.3999999612569809 +time 76.04 +51 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 38646.03125 +err_fin 33657.5546875 +sparsity check 0.399999737739563 +time 1.54 +51 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 90655.71875 +err_fin 82529.46875 +sparsity check 0.399999737739563 +time 1.35 +51 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 17904.578125 +err_fin 9912.552734375 +sparsity check 0.3999999612569809 +time 68.92 +51 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2288922.75 +err_fin 1644780.375 +sparsity check 0.399999988930566 +time 139.32 +51 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1998986.0 +err_fin 1421125.75 +sparsity check 0.399999988930566 +time 139.34 +51 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 294801.0 +err_fin 269504.6875 +sparsity check 0.399999988930566 +time 137.07 +52 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 167389.796875 +err_fin 126759.140625 +sparsity check 0.3999999612569809 +time 76.02 +52 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 50601.26953125 +err_fin 44585.43359375 +sparsity check 0.399999737739563 +time 1.34 +52 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 103333.921875 +err_fin 93834.59375 +sparsity check 0.399999737739563 +time 1.33 +52 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 34392.21875 +err_fin 19500.4609375 +sparsity check 0.3999999612569809 +time 68.97 +52 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2348911.25 +err_fin 1694721.0 +sparsity check 0.399999988930566 +time 139.21 +52 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2048677.125 +err_fin 1462018.125 +sparsity check 0.399999988930566 +time 139.61 +52 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 300744.8125 +err_fin 275488.0625 +sparsity check 0.399999988930566 +time 137.40 +53 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 50624.0234375 +err_fin 38541.99609375 +sparsity check 0.3999999612569809 +time 76.05 +53 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 15209.3046875 +err_fin 13159.8466796875 +sparsity check 0.399999737739563 +time 1.36 +53 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 86490.6875 +err_fin 78445.734375 +sparsity check 0.399999737739563 +time 1.34 +53 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 15345.724609375 +err_fin 8511.248046875 +sparsity check 0.3999999612569809 +time 69.03 +53 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2411984.5 +err_fin 1738109.25 +sparsity check 0.399999988930566 +time 139.27 +53 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2108468.0 +err_fin 1502701.0 +sparsity check 0.399999988930566 +time 139.70 +53 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 303820.53125 +err_fin 278497.3125 +sparsity check 0.399999988930566 +time 136.95 +54 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 59636.765625 +err_fin 45276.53515625 +sparsity check 0.3999999612569809 +time 76.04 +54 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 17630.81640625 +err_fin 15097.21484375 +sparsity check 0.399999737739563 +time 1.36 +54 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 89247.875 +err_fin 81014.6015625 +sparsity check 0.399999737739563 +time 1.36 +54 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 17046.90234375 +err_fin 9478.064453125 +sparsity check 0.3999999612569809 +time 69.01 +54 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2473427.25 +err_fin 1786240.75 +sparsity check 0.399999988930566 +time 139.28 +54 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2167564.75 +err_fin 1548563.5 +sparsity check 0.399999988930566 +time 139.63 +54 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 303654.15625 +err_fin 279146.78125 +sparsity check 0.399999988930566 +time 137.03 +55 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 97923.671875 +err_fin 74614.84375 +sparsity check 0.3999999612569809 +time 76.01 +55 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 34288.24609375 +err_fin 29903.81640625 +sparsity check 0.399999737739563 +time 1.36 +55 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 116934.5625 +err_fin 105758.9375 +sparsity check 0.399999737739563 +time 1.34 +55 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 34275.53515625 +err_fin 18865.6484375 +sparsity check 0.3999999612569809 +time 68.94 +55 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2522007.25 +err_fin 1825207.75 +sparsity check 0.399999988930566 +time 138.95 +55 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2227084.75 +err_fin 1594715.5 +sparsity check 0.399999988930566 +time 139.14 +55 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 310645.46875 +err_fin 286460.5 +sparsity check 0.399999988930566 +time 137.28 +56 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 73575.875 +err_fin 56280.72265625 +sparsity check 0.3999999612569809 +time 76.01 +56 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 27285.4453125 +err_fin 23696.5234375 +sparsity check 0.399999737739563 +time 1.35 +56 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 82452.4453125 +err_fin 74864.8359375 +sparsity check 0.399999737739563 +time 1.34 +56 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 17726.39453125 +err_fin 9315.02734375 +sparsity check 0.3999999612569809 +time 68.94 +56 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2563127.5 +err_fin 1853663.125 +sparsity check 0.399999988930566 +time 139.19 +56 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2268121.75 +err_fin 1623319.0 +sparsity check 0.399999988930566 +time 139.50 +56 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 314845.40625 +err_fin 290457.625 +sparsity check 0.399999988930566 +time 137.34 +57 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 65624.921875 +err_fin 49837.5703125 +sparsity check 0.3999999612569809 +time 76.11 +57 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 20926.7578125 +err_fin 18283.404296875 +sparsity check 0.399999737739563 +time 1.36 +57 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 106189.296875 +err_fin 96529.4375 +sparsity check 0.399999737739563 +time 1.35 +57 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 18766.31640625 +err_fin 10523.56640625 +sparsity check 0.3999999612569809 +time 68.92 +57 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2649966.0 +err_fin 1919384.75 +sparsity check 0.399999988930566 +time 139.30 +57 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2348224.25 +err_fin 1684120.25 +sparsity check 0.399999988930566 +time 139.40 +57 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 320348.4375 +err_fin 296307.125 +sparsity check 0.399999988930566 +time 137.00 +58 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 36040.35546875 +err_fin 27597.578125 +sparsity check 0.3999999612569809 +time 76.09 +58 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 10517.08203125 +err_fin 8903.44140625 +sparsity check 0.399999737739563 +time 1.36 +58 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 75836.828125 +err_fin 68298.859375 +sparsity check 0.399999737739563 +time 1.34 +58 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 12667.3779296875 +err_fin 7178.7724609375 +sparsity check 0.3999999612569809 +time 68.97 +58 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2710278.25 +err_fin 1967515.5 +sparsity check 0.399999988930566 +time 139.29 +58 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2404047.75 +err_fin 1729631.125 +sparsity check 0.399999988930566 +time 139.36 +58 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 322225.75 +err_fin 298635.03125 +sparsity check 0.399999988930566 +time 137.06 +59 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 46747.12109375 +err_fin 35650.5703125 +sparsity check 0.3999999612569809 +time 76.05 +59 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 13351.0546875 +err_fin 11528.861328125 +sparsity check 0.399999737739563 +time 1.36 +59 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 84195.9375 +err_fin 75407.7421875 +sparsity check 0.399999737739563 +time 1.35 +59 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 12003.314453125 +err_fin 6581.07958984375 +sparsity check 0.3999999612569809 +time 69.00 +59 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2774652.0 +err_fin 2019904.25 +sparsity check 0.399999988930566 +time 139.35 +59 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2467993.75 +err_fin 1779848.75 +sparsity check 0.399999988930566 +time 139.19 +59 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 322592.5625 +err_fin 299847.0 +sparsity check 0.399999988930566 +time 137.37 +60 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 7381.94287109375 +err_fin 5601.66162109375 +sparsity check 0.3999999612569809 +time 76.11 +60 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 1925.58837890625 +err_fin 1598.54248046875 +sparsity check 0.399999737739563 +time 1.36 +60 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 45933.5 +err_fin 41093.85546875 +sparsity check 0.399999737739563 +time 1.34 +60 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 7170.42529296875 +err_fin 4106.572265625 +sparsity check 0.3999999612569809 +time 68.98 +60 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2818281.25 +err_fin 2060159.5 +sparsity check 0.399999988930566 +time 139.37 +60 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2518651.75 +err_fin 1824972.25 +sparsity check 0.399999988930566 +time 139.48 +60 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 321096.96875 +err_fin 299192.5625 +sparsity check 0.399999988930566 +time 137.28 +61 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 29229.53515625 +err_fin 22513.853515625 +sparsity check 0.3999999612569809 +time 76.08 +61 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 7946.28515625 +err_fin 6801.779296875 +sparsity check 0.399999737739563 +time 1.36 +61 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 77985.5859375 +err_fin 70779.703125 +sparsity check 0.399999737739563 +time 1.35 +61 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 11768.6376953125 +err_fin 6725.62255859375 +sparsity check 0.3999999612569809 +time 69.17 +61 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2873632.25 +err_fin 2103654.5 +sparsity check 0.399999988930566 +time 139.37 +61 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2576728.25 +err_fin 1871461.75 +sparsity check 0.399999988930566 +time 139.56 +61 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 322479.8125 +err_fin 301094.6875 +sparsity check 0.399999988930566 +time 137.41 +62 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 31340.5 +err_fin 24200.8046875 +sparsity check 0.3999999612569809 +time 76.05 +62 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 9445.275390625 +err_fin 8100.333984375 +sparsity check 0.399999737739563 +time 1.35 +62 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 84320.0 +err_fin 75629.84375 +sparsity check 0.399999737739563 +time 1.35 +62 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 9772.4111328125 +err_fin 4984.01513671875 +sparsity check 0.3999999612569809 +time 68.92 +62 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2936760.0 +err_fin 2158155.0 +sparsity check 0.399999988930566 +time 139.20 +62 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2647007.5 +err_fin 1928893.0 +sparsity check 0.399999988930566 +time 139.46 +62 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 325873.9375 +err_fin 304662.15625 +sparsity check 0.399999988930566 +time 137.08 +63 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 28726.470703125 +err_fin 22369.49609375 +sparsity check 0.3999999612569809 +time 76.04 +63 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 7172.8134765625 +err_fin 6141.5068359375 +sparsity check 0.399999737739563 +time 1.36 +63 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 65172.89453125 +err_fin 57633.515625 +sparsity check 0.399999737739563 +time 1.34 +63 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 7923.49365234375 +err_fin 4491.5146484375 +sparsity check 0.3999999612569809 +time 68.91 +63 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3004869.5 +err_fin 2214143.0 +sparsity check 0.399999988930566 +time 139.28 +63 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2710337.0 +err_fin 1982892.25 +sparsity check 0.399999988930566 +time 139.29 +63 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 330092.8125 +err_fin 309002.21875 +sparsity check 0.399999988930566 +time 137.39 +64 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 54454.6015625 +err_fin 42228.97265625 +sparsity check 0.3999999612569809 +time 76.02 +64 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 16738.142578125 +err_fin 14420.96484375 +sparsity check 0.399999737739563 +time 1.37 +64 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 95227.5859375 +err_fin 86672.1171875 +sparsity check 0.399999737739563 +time 1.36 +64 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 16644.689453125 +err_fin 8670.3564453125 +sparsity check 0.3999999612569809 +time 68.92 +64 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3067880.0 +err_fin 2264808.0 +sparsity check 0.399999988930566 +time 139.25 +64 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2776968.5 +err_fin 2035704.0 +sparsity check 0.399999988930566 +time 139.33 +64 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 335391.875 +err_fin 314201.5 +sparsity check 0.399999988930566 +time 137.21 +65 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 14516.8232421875 +err_fin 11307.5546875 +sparsity check 0.3999999612569809 +time 76.05 +65 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 3333.04736328125 +err_fin 2781.57666015625 +sparsity check 0.399999737739563 +time 1.36 +65 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 52414.85546875 +err_fin 46795.5859375 +sparsity check 0.399999737739563 +time 1.35 +65 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 6793.7373046875 +err_fin 3684.34521484375 +sparsity check 0.3999999612569809 +time 68.92 +65 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3121126.25 +err_fin 2312601.5 +sparsity check 0.399999988930566 +time 138.97 +65 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2839029.0 +err_fin 2089455.75 +sparsity check 0.399999988930566 +time 139.24 +65 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 341135.375 +err_fin 320050.1875 +sparsity check 0.399999988930566 +time 137.38 +66 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 24849.8046875 +err_fin 19324.787109375 +sparsity check 0.3999999612569809 +time 76.07 +66 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 6448.6796875 +err_fin 5454.6904296875 +sparsity check 0.399999737739563 +time 1.36 +66 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 69806.2421875 +err_fin 63193.6640625 +sparsity check 0.399999737739563 +time 1.34 +66 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 14557.0380859375 +err_fin 8636.525390625 +sparsity check 0.3999999612569809 +time 68.94 +66 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3198090.5 +err_fin 2368442.5 +sparsity check 0.399999988930566 +time 139.30 +66 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2925024.5 +err_fin 2152401.0 +sparsity check 0.399999988930566 +time 139.71 +66 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 356939.4375 +err_fin 334807.625 +sparsity check 0.399999988930566 +time 137.26 +67 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 11913.30859375 +err_fin 9233.849609375 +sparsity check 0.3999999612569809 +time 76.02 +67 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 2945.35595703125 +err_fin 2397.56005859375 +sparsity check 0.399999737739563 +time 1.36 +67 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 27783.22265625 +err_fin 24270.81640625 +sparsity check 0.399999737739563 +time 1.35 +67 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 4088.1806640625 +err_fin 1924.853515625 +sparsity check 0.3999999612569809 +time 68.91 +67 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3231225.0 +err_fin 2397659.0 +sparsity check 0.399999988930566 +time 139.37 +67 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2977180.75 +err_fin 2195135.0 +sparsity check 0.399999988930566 +time 139.31 +67 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 360001.875 +err_fin 337752.40625 +sparsity check 0.399999988930566 +time 137.40 +68 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 104838.203125 +err_fin 81429.46875 +sparsity check 0.3999999612569809 +time 76.05 +68 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 38504.1484375 +err_fin 33724.22265625 +sparsity check 0.399999737739563 +time 1.35 +68 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 137444.734375 +err_fin 125453.953125 +sparsity check 0.399999737739563 +time 1.34 +68 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 15373.6875 +err_fin 8905.60546875 +sparsity check 0.3999999612569809 +time 68.97 +68 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3348988.5 +err_fin 2488614.0 +sparsity check 0.399999988930566 +time 139.36 +68 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3100822.5 +err_fin 2292061.0 +sparsity check 0.399999988930566 +time 139.35 +68 mlp.down_proj +Pruning ... +0.39999998467309134 0.2499999850988388 0.32857141750199453 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253876941607 0.9709505944546686 1.0 +err_prefin 384532.09375 +err_fin 360141.5625 +sparsity check 0.39999998467309134 +time 137.33 +69 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 179347.78125 +err_fin 139475.03125 +sparsity check 0.3999999612569809 +time 76.02 +69 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 86929.484375 +err_fin 76924.859375 +sparsity check 0.399999737739563 +time 1.54 +69 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 180458.5 +err_fin 162700.0 +sparsity check 0.399999737739563 +time 1.34 +69 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 24603.837890625 +err_fin 13488.77734375 +sparsity check 0.3999999612569809 +time 68.91 +69 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3413958.25 +err_fin 2536731.5 +sparsity check 0.399999988930566 +time 139.29 +69 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3193894.5 +err_fin 2361396.0 +sparsity check 0.399999988930566 +time 139.77 +69 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 403397.5 +err_fin 378025.625 +sparsity check 0.399999988930566 +time 137.02 +70 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 100971.796875 +err_fin 78724.796875 +sparsity check 0.3999999612569809 +time 76.07 +70 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 38836.046875 +err_fin 34014.4296875 +sparsity check 0.399999737739563 +time 1.55 +70 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 102763.703125 +err_fin 92557.21875 +sparsity check 0.399999737739563 +time 1.34 +70 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 18146.841796875 +err_fin 9677.0751953125 +sparsity check 0.3999999612569809 +time 68.98 +70 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3491533.0 +err_fin 2594608.0 +sparsity check 0.399999988930566 +time 139.01 +70 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3285514.25 +err_fin 2429184.5 +sparsity check 0.399999988930566 +time 139.71 +70 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 430509.3125 +err_fin 402992.625 +sparsity check 0.399999988930566 +time 137.35 +71 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 139824.796875 +err_fin 108807.4765625 +sparsity check 0.3999999612569809 +time 76.05 +71 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 65574.125 +err_fin 57730.90234375 +sparsity check 0.399999737739563 +time 1.54 +71 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 110519.40625 +err_fin 99854.078125 +sparsity check 0.399999737739563 +time 1.35 +71 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 23332.802734375 +err_fin 12481.970703125 +sparsity check 0.3999999612569809 +time 68.95 +71 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3605251.5 +err_fin 2672655.5 +sparsity check 0.399999988930566 +time 139.33 +71 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3398419.25 +err_fin 2508336.25 +sparsity check 0.399999988930566 +time 139.78 +71 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 455241.75 +err_fin 425527.34375 +sparsity check 0.399999988930566 +time 137.26 +72 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 196818.25 +err_fin 152508.28125 +sparsity check 0.3999999612569809 +time 76.06 +72 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 92007.8046875 +err_fin 81369.671875 +sparsity check 0.399999737739563 +time 1.53 +72 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 129737.375 +err_fin 117728.75 +sparsity check 0.399999737739563 +time 1.34 +72 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 30920.36328125 +err_fin 18197.14453125 +sparsity check 0.3999999612569809 +time 68.97 +72 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3690917.25 +err_fin 2729482.5 +sparsity check 0.399999988930566 +time 139.01 +72 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3501261.0 +err_fin 2578744.0 +sparsity check 0.399999988930566 +time 139.68 +72 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 489137.3125 +err_fin 456619.71875 +sparsity check 0.399999988930566 +time 137.34 +73 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 182977.8125 +err_fin 141293.25 +sparsity check 0.3999999612569809 +time 76.10 +73 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 89972.203125 +err_fin 79496.8515625 +sparsity check 0.399999737739563 +time 1.53 +73 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 147110.46875 +err_fin 131562.28125 +sparsity check 0.399999737739563 +time 1.35 +73 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 19759.52734375 +err_fin 11627.740234375 +sparsity check 0.3999999612569809 +time 68.95 +73 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3779088.5 +err_fin 2786089.25 +sparsity check 0.399999988930566 +time 139.34 +73 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3602361.0 +err_fin 2646019.0 +sparsity check 0.399999988930566 +time 139.38 +73 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 537911.375 +err_fin 499562.375 +sparsity check 0.399999988930566 +time 136.98 +74 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 166243.046875 +err_fin 127981.9765625 +sparsity check 0.3999999612569809 +time 76.05 +74 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 75013.25 +err_fin 65175.484375 +sparsity check 0.399999737739563 +time 1.36 +74 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 111847.640625 +err_fin 99155.484375 +sparsity check 0.399999737739563 +time 1.33 +74 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 40018.34375 +err_fin 20523.7578125 +sparsity check 0.3999999612569809 +time 69.04 +74 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3866103.0 +err_fin 2820464.75 +sparsity check 0.399999988930566 +time 139.43 +74 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3698074.0 +err_fin 2687355.5 +sparsity check 0.399999988930566 +time 139.73 +74 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 592491.375 +err_fin 546775.8125 +sparsity check 0.399999988930566 +time 137.35 +75 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 168544.46875 +err_fin 128167.65625 +sparsity check 0.3999999612569809 +time 76.06 +75 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 74026.578125 +err_fin 63848.19921875 +sparsity check 0.399999737739563 +time 1.35 +75 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 126137.78125 +err_fin 110095.375 +sparsity check 0.399999737739563 +time 1.35 +75 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 36850.890625 +err_fin 19020.365234375 +sparsity check 0.3999999612569809 +time 68.95 +75 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3900661.0 +err_fin 2822843.0 +sparsity check 0.399999988930566 +time 139.36 +75 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3724726.5 +err_fin 2687188.0 +sparsity check 0.399999988930566 +time 139.40 +75 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 667397.5625 +err_fin 608322.125 +sparsity check 0.399999988930566 +time 137.07 +76 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 253750.90625 +err_fin 189531.53125 +sparsity check 0.3999999612569809 +time 76.02 +76 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 107933.15625 +err_fin 91723.34375 +sparsity check 0.399999737739563 +time 1.34 +76 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 202030.109375 +err_fin 173529.890625 +sparsity check 0.399999737739563 +time 1.33 +76 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 123403.4453125 +err_fin 70644.203125 +sparsity check 0.3999999612569809 +time 68.98 +76 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3866278.5 +err_fin 2741640.25 +sparsity check 0.399999988930566 +time 139.36 +76 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3663346.75 +err_fin 2588183.5 +sparsity check 0.399999988930566 +time 139.71 +76 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 803513.4375 +err_fin 713280.9375 +sparsity check 0.399999988930566 +time 137.34 +77 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 167097.28125 +err_fin 121912.25 +sparsity check 0.3999999612569809 +time 76.05 +77 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 77177.921875 +err_fin 64188.6796875 +sparsity check 0.399999737739563 +time 1.55 +77 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 134594.03125 +err_fin 114464.7890625 +sparsity check 0.399999737739563 +time 1.34 +77 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 54597.484375 +err_fin 24432.23046875 +sparsity check 0.3999999612569809 +time 68.95 +77 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3596767.5 +err_fin 2479884.0 +sparsity check 0.399999988930566 +time 139.44 +77 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3412120.5 +err_fin 2341643.5 +sparsity check 0.399999988930566 +time 139.41 +77 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 920975.75 +err_fin 788917.125 +sparsity check 0.399999988930566 +time 137.28 +78 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 149312.1875 +err_fin 104054.328125 +sparsity check 0.3999999612569809 +time 76.06 +78 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 73333.828125 +err_fin 58532.171875 +sparsity check 0.399999737739563 +time 1.35 +78 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 119455.03125 +err_fin 100930.953125 +sparsity check 0.399999737739563 +time 1.34 +78 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 33042.1171875 +err_fin 16012.80078125 +sparsity check 0.3999999612569809 +time 69.15 +78 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3002931.5 +err_fin 1987053.25 +sparsity check 0.399999988930566 +time 139.52 +78 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2814261.0 +err_fin 1855245.25 +sparsity check 0.399999988930566 +time 139.48 +78 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 976734.5 +err_fin 781829.0 +sparsity check 0.399999988930566 +time 137.12 +79 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 93958.484375 +err_fin 60492.56640625 +sparsity check 0.3999999612569809 +time 76.09 +79 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 57846.0703125 +err_fin 44838.82421875 +sparsity check 0.399999737739563 +time 1.36 +79 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 52594.4296875 +err_fin 41885.23828125 +sparsity check 0.399999737739563 +time 1.35 +79 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 9301.072265625 +err_fin 3125.864990234375 +sparsity check 0.3999999612569809 +time 68.94 +79 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1829445.375 +err_fin 1109003.875 +sparsity check 0.399999988930566 +time 139.53 +79 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1672239.25 +err_fin 1014265.0 +sparsity check 0.399999988930566 +time 139.53 +79 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 825539.125 +err_fin 553685.25 +sparsity check 0.399999988930566 +time 137.52 +model.embed_tokens.weight tensor(2.5520e-06) +model.layers.0.self_attn.q_proj.weight tensor(0.0106) +model.layers.0.self_attn.k_proj.weight tensor(0.0180) +model.layers.0.self_attn.v_proj.weight tensor(0.0582) +model.layers.0.self_attn.o_proj.weight tensor(3.6061e-06) +model.layers.0.mlp.gate_proj.weight tensor(0.0001) +model.layers.0.mlp.up_proj.weight tensor(0.0001) +model.layers.0.mlp.down_proj.weight tensor(0.0088) +51508.91003680229 +Dataset: wikitext2 +Evaluating ... +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +Perplexity: 3.794084 diff --git a/logs/llama2-70-0.6-fix-mask b/logs/llama2-70-0.6-fix-mask new file mode 100644 index 0000000..dcd6773 --- /dev/null +++ b/logs/llama2-70-0.6-fix-mask @@ -0,0 +1,4020 @@ +Running on dev: cuda:0 +loading llama +llama loaded +Starting... on device cuda:0 +model.layers.0.self_attn.q_proj.weight torch.Size([8192, 8192]) (8192, 8192) 0.1 +model.layers.0.self_attn.k_proj.weight torch.Size([1024, 8192]) (1024, 8192) 0.2 +model.layers.0.mlp.gate_proj.weight torch.Size([28672, 8192]) (8192, 28672) 0.2 +Ready. +0 self_attn.q_proj +Pruning ... +0.3900318294763565 0.09003184735774994 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3178669878329639 0.9709505944546686 1.0 +err_prefin 67.93980407714844 +err_fin 17.388469696044922 +sparsity check 0.3900318294763565 +time 75.10 +0 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 84.05683898925781 +err_fin 22.081422805786133 +sparsity check 0.39999985694885254 +time 1.33 +0 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 8.222892761230469 +err_fin 4.271332740783691 +sparsity check 0.39999985694885254 +time 1.31 +0 self_attn.o_proj +Pruning ... +0.3798350691795349 0.07983508706092834 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2828887199008383 0.9709505944546686 1.0 +err_prefin 3.1282148361206055 +err_fin 0.16622190177440643 +sparsity check 0.3798350691795349 +time 67.96 +0 mlp.gate_proj +Pruning ... +0.3999928278582437 0.1999748945236206 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337774124002955 0.9709505944546686 1.0 +err_prefin 1111.20751953125 +err_fin 339.8477783203125 +sparsity check 0.3999928278582437 +time 137.86 +0 mlp.up_proj +Pruning ... +0.3999928278582437 0.1999748945236206 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337774124002955 0.9709505944546686 1.0 +err_prefin 1123.2789306640625 +err_fin 339.15533447265625 +sparsity check 0.3999928278582437 +time 138.22 +0 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 22.08213233947754 +err_fin 9.71038818359375 +sparsity check 0.3999999931880406 +time 135.68 +1 self_attn.q_proj +Pruning ... +0.38066042959690094 0.08066044747829437 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2857929308096117 0.9709505944546686 1.0 +err_prefin 937.7511596679688 +err_fin 194.9318084716797 +sparsity check 0.38066042959690094 +time 75.08 +1 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 973.091064453125 +err_fin 250.7093505859375 +sparsity check 0.39999985694885254 +time 1.32 +1 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 56.17710876464844 +err_fin 28.279510498046875 +sparsity check 0.39999985694885254 +time 1.33 +1 self_attn.o_proj +Pruning ... +0.3773004561662674 0.07730047404766083 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2738859507762847 0.9709505944546686 1.0 +err_prefin 59.44793701171875 +err_fin 9.102714538574219 +sparsity check 0.3773004561662674 +time 67.95 +1 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 8944.666015625 +err_fin 2393.19287109375 +sparsity check 0.3999999931880406 +time 137.87 +1 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 9889.7255859375 +err_fin 2553.67919921875 +sparsity check 0.3999999931880406 +time 138.10 +1 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 369.8732604980469 +err_fin 291.06585693359375 +sparsity check 0.3999999931880406 +time 135.76 +2 self_attn.q_proj +Pruning ... +0.39730459451675415 0.09730461239814758 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.341683593222438 0.9709505944546686 1.0 +err_prefin 3279.197265625 +err_fin 1069.7049560546875 +sparsity check 0.39730459451675415 +time 75.04 +2 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 4669.1806640625 +err_fin 1998.608154296875 +sparsity check 0.39999985694885254 +time 1.31 +2 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 277.66925048828125 +err_fin 176.12515258789062 +sparsity check 0.39999985694885254 +time 1.30 +2 self_attn.o_proj +Pruning ... +0.39454004168510437 0.0945400595664978 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3327359593366572 0.9709505944546686 1.0 +err_prefin 300.2970275878906 +err_fin 87.72624206542969 +sparsity check 0.39454004168510437 +time 67.93 +2 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 35212.859375 +err_fin 13353.748046875 +sparsity check 0.3999999931880406 +time 137.86 +2 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 36606.16015625 +err_fin 13565.90234375 +sparsity check 0.3999999931880406 +time 138.25 +2 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 1367.951416015625 +err_fin 1155.0523681640625 +sparsity check 0.3999999931880406 +time 135.80 +3 self_attn.q_proj +Pruning ... +0.39995184540748596 0.0999518632888794 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3501338626233381 0.9709505944546686 1.0 +err_prefin 29859.439453125 +err_fin 13811.650390625 +sparsity check 0.39995184540748596 +time 74.96 +3 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 20594.76953125 +err_fin 12368.8828125 +sparsity check 0.39999985694885254 +time 1.33 +3 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 3717.0654296875 +err_fin 2571.700439453125 +sparsity check 0.39999985694885254 +time 1.31 +3 self_attn.o_proj +Pruning ... +0.39858680963516235 0.09858682751655579 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.345790746317014 0.9709505944546686 1.0 +err_prefin 584.930419921875 +err_fin 247.90924072265625 +sparsity check 0.39858680963516235 +time 67.84 +3 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 77081.234375 +err_fin 36747.4921875 +sparsity check 0.3999999931880406 +time 137.80 +3 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 77310.25 +err_fin 36238.3203125 +sparsity check 0.3999999931880406 +time 138.07 +3 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 1846.95849609375 +err_fin 1632.4097900390625 +sparsity check 0.3999999931880406 +time 135.72 +4 self_attn.q_proj +Pruning ... +0.3999020755290985 0.09990209341049194 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.349976037564583 0.9709505944546686 1.0 +err_prefin 57675.33984375 +err_fin 29864.189453125 +sparsity check 0.3999020755290985 +time 74.97 +4 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 26593.50390625 +err_fin 16409.076171875 +sparsity check 0.39999985694885254 +time 1.33 +4 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 5928.640625 +err_fin 4469.638671875 +sparsity check 0.39999985694885254 +time 1.31 +4 self_attn.o_proj +Pruning ... +0.3970344215631485 0.09703443944454193 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3408147494946228 0.9709505944546686 1.0 +err_prefin 719.8106689453125 +err_fin 330.6018981933594 +sparsity check 0.3970344215631485 +time 67.85 +4 mlp.gate_proj +Pruning ... +0.39997968077659607 0.199928879737854 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337511125328974 0.9709505944546686 1.0 +err_prefin 126231.765625 +err_fin 68969.1875 +sparsity check 0.39997968077659607 +time 137.80 +4 mlp.up_proj +Pruning ... +0.3999303664479937 0.19975627958774567 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1336524138544812 0.9709505944546686 1.0 +err_prefin 123856.71875 +err_fin 66862.328125 +sparsity check 0.3999303664479937 +time 138.20 +4 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 2812.97265625 +err_fin 2526.6923828125 +sparsity check 0.3999999931880406 +time 135.79 +5 self_attn.q_proj +Pruning ... +0.39997562766075134 0.09997564554214478 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502092644027002 0.9709505944546686 1.0 +err_prefin 91248.796875 +err_fin 52015.40625 +sparsity check 0.39997562766075134 +time 74.95 +5 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 39969.01953125 +err_fin 27353.34375 +sparsity check 0.39999985694885254 +time 1.34 +5 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 8938.1171875 +err_fin 6987.9599609375 +sparsity check 0.39999985694885254 +time 1.31 +5 self_attn.o_proj +Pruning ... +0.3990519344806671 0.09905195236206055 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3472740069065536 0.9709505944546686 1.0 +err_prefin 1242.98095703125 +err_fin 540.0116577148438 +sparsity check 0.3990519344806671 +time 67.85 +5 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 158335.46875 +err_fin 92801.5 +sparsity check 0.3999999931880406 +time 137.82 +5 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 154464.203125 +err_fin 89651.2734375 +sparsity check 0.3999999931880406 +time 138.07 +5 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 3972.828857421875 +err_fin 3539.4873046875 +sparsity check 0.3999999931880406 +time 135.71 +6 self_attn.q_proj +Pruning ... +0.3999880701303482 0.09998808801174164 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502487097151674 0.9709505944546686 1.0 +err_prefin 103943.5625 +err_fin 62416.046875 +sparsity check 0.3999880701303482 +time 74.96 +6 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 43369.578125 +err_fin 31196.71875 +sparsity check 0.39999985694885254 +time 1.34 +6 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 10240.4765625 +err_fin 8313.4150390625 +sparsity check 0.39999985694885254 +time 1.30 +6 self_attn.o_proj +Pruning ... +0.3946770429611206 0.09467706084251404 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3331823778796275 0.9709505944546686 1.0 +err_prefin 2077.16015625 +err_fin 1079.2830810546875 +sparsity check 0.3946770429611206 +time 67.88 +6 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 210390.25 +err_fin 128369.984375 +sparsity check 0.3999999931880406 +time 137.79 +6 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 203740.015625 +err_fin 123401.203125 +sparsity check 0.3999999931880406 +time 138.22 +6 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 5852.21826171875 +err_fin 5284.48291015625 +sparsity check 0.3999999931880406 +time 135.82 +7 self_attn.q_proj +Pruning ... +0.3999880701303482 0.09998808801174164 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502487097151674 0.9709505944546686 1.0 +err_prefin 172368.59375 +err_fin 109349.6171875 +sparsity check 0.3999880701303482 +time 74.94 +7 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 59500.1796875 +err_fin 45512.26953125 +sparsity check 0.39999985694885254 +time 1.35 +7 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 14638.193359375 +err_fin 12277.2529296875 +sparsity check 0.39999985694885254 +time 1.31 +7 self_attn.o_proj +Pruning ... +0.3913321793079376 0.09133219718933105 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3221918325783726 0.9709505944546686 1.0 +err_prefin 2677.06298828125 +err_fin 1313.254150390625 +sparsity check 0.3913321793079376 +time 67.90 +7 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 259888.875 +err_fin 164794.28125 +sparsity check 0.3999999931880406 +time 137.80 +7 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 251294.296875 +err_fin 158165.375 +sparsity check 0.3999999931880406 +time 138.07 +7 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 8274.5498046875 +err_fin 7470.8671875 +sparsity check 0.3999999931880406 +time 135.71 +8 self_attn.q_proj +Pruning ... +0.3999880701303482 0.09998808801174164 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502487097151674 0.9709505944546686 1.0 +err_prefin 151063.484375 +err_fin 98389.7734375 +sparsity check 0.3999880701303482 +time 74.97 +8 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 58661.2265625 +err_fin 43408.1328125 +sparsity check 0.39999985694885254 +time 1.32 +8 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 13763.2666015625 +err_fin 11586.0107421875 +sparsity check 0.39999985694885254 +time 1.30 +8 self_attn.o_proj +Pruning ... +0.39467422664165497 0.0946742445230484 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3331732040868158 0.9709505944546686 1.0 +err_prefin 4621.22314453125 +err_fin 2204.10498046875 +sparsity check 0.39467422664165497 +time 67.85 +8 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 357495.0 +err_fin 235801.171875 +sparsity check 0.3999999931880406 +time 137.81 +8 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 338663.09375 +err_fin 221967.1875 +sparsity check 0.3999999931880406 +time 138.20 +8 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 16046.419921875 +err_fin 14547.4453125 +sparsity check 0.3999999931880406 +time 135.80 +9 self_attn.q_proj +Pruning ... +0.39997535943984985 0.09997537732124329 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.350208414057257 0.9709505944546686 1.0 +err_prefin 165287.0625 +err_fin 115790.078125 +sparsity check 0.39997535943984985 +time 74.96 +9 self_attn.k_proj +Pruning ... +0.399999737739563 0.19999980926513672 0.3749997615814209 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446747913956842 0.9709505944546686 1.0 +err_prefin 57355.265625 +err_fin 46549.86328125 +sparsity check 0.399999737739563 +time 1.32 +9 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 20768.37109375 +err_fin 18328.796875 +sparsity check 0.39999985694885254 +time 1.31 +9 self_attn.o_proj +Pruning ... +0.39449165761470795 0.09449167549610138 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.33257822431971 0.9709505944546686 1.0 +err_prefin 2547.7529296875 +err_fin 1121.1663818359375 +sparsity check 0.39449165761470795 +time 67.82 +9 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 455587.1875 +err_fin 311802.75 +sparsity check 0.3999999931880406 +time 137.78 +9 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 428959.15625 +err_fin 291601.25 +sparsity check 0.3999999931880406 +time 138.02 +9 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 14794.69921875 +err_fin 13578.59765625 +sparsity check 0.3999999931880406 +time 135.67 +10 self_attn.q_proj +Pruning ... +0.39997535943984985 0.09997537732124329 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.350208414057257 0.9709505944546686 1.0 +err_prefin 99369.4609375 +err_fin 70449.3203125 +sparsity check 0.39997535943984985 +time 74.94 +10 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 39679.23828125 +err_fin 30677.9765625 +sparsity check 0.39999985694885254 +time 1.35 +10 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 20211.69921875 +err_fin 17682.802734375 +sparsity check 0.39999985694885254 +time 1.30 +10 self_attn.o_proj +Pruning ... +0.38530606031417847 0.0853060781955719 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3018958142505257 0.9709505944546686 1.0 +err_prefin 1716.8173828125 +err_fin 816.5371704101562 +sparsity check 0.38530606031417847 +time 67.90 +10 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 548887.8125 +err_fin 392840.125 +sparsity check 0.3999999931880406 +time 137.79 +10 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 515091.375 +err_fin 366501.40625 +sparsity check 0.3999999931880406 +time 138.07 +10 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 16544.64453125 +err_fin 15337.5859375 +sparsity check 0.3999999931880406 +time 135.68 +11 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 159820.9375 +err_fin 113646.6875 +sparsity check 0.39998745918273926 +time 74.96 +11 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 64640.4375 +err_fin 49469.5078125 +sparsity check 0.39999985694885254 +time 1.34 +11 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 24035.24609375 +err_fin 20733.14453125 +sparsity check 0.39999985694885254 +time 1.32 +11 self_attn.o_proj +Pruning ... +0.38262051343917847 0.0826205313205719 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.292637027454332 0.9709505944546686 1.0 +err_prefin 2840.35986328125 +err_fin 1312.10595703125 +sparsity check 0.38262051343917847 +time 67.88 +11 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 589919.125 +err_fin 429187.125 +sparsity check 0.3999999931880406 +time 137.77 +11 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 556431.3125 +err_fin 402648.0 +sparsity check 0.3999999931880406 +time 138.06 +11 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 18161.0546875 +err_fin 16874.15234375 +sparsity check 0.3999999931880406 +time 135.65 +12 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 134002.015625 +err_fin 95821.796875 +sparsity check 0.39998745918273926 +time 74.95 +12 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 58485.625 +err_fin 44912.3515625 +sparsity check 0.39999985694885254 +time 1.32 +12 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 18154.234375 +err_fin 15332.16796875 +sparsity check 0.39999985694885254 +time 1.32 +12 self_attn.o_proj +Pruning ... +0.3908897191286087 0.09088973701000214 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3207235687633965 0.9709505944546686 1.0 +err_prefin 3292.9931640625 +err_fin 1473.5245361328125 +sparsity check 0.3908897191286087 +time 67.86 +12 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 583987.1875 +err_fin 426351.125 +sparsity check 0.3999999931880406 +time 137.79 +12 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 556522.8125 +err_fin 404676.78125 +sparsity check 0.3999999931880406 +time 138.09 +12 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 20344.2109375 +err_fin 18797.353515625 +sparsity check 0.3999999931880406 +time 135.66 +13 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 178159.578125 +err_fin 128332.265625 +sparsity check 0.39998745918273926 +time 74.93 +13 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 65993.390625 +err_fin 51519.92578125 +sparsity check 0.39999985694885254 +time 1.35 +13 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 24204.1953125 +err_fin 20978.1875 +sparsity check 0.39999985694885254 +time 1.32 +13 self_attn.o_proj +Pruning ... +0.3840336948633194 0.08403371274471283 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2975260219707532 0.9709505944546686 1.0 +err_prefin 8226.1689453125 +err_fin 4241.2490234375 +sparsity check 0.3840336948633194 +time 67.85 +13 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 621849.8125 +err_fin 446843.34375 +sparsity check 0.3999999931880406 +time 137.76 +13 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 598487.125 +err_fin 428082.15625 +sparsity check 0.3999999931880406 +time 138.06 +13 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 25953.232421875 +err_fin 23996.412109375 +sparsity check 0.3999999931880406 +time 135.68 +14 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 291831.1875 +err_fin 212318.5625 +sparsity check 0.39998745918273926 +time 74.96 +14 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 96605.0 +err_fin 77073.796875 +sparsity check 0.39999985694885254 +time 1.34 +14 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 34245.859375 +err_fin 30060.234375 +sparsity check 0.39999985694885254 +time 1.31 +14 self_attn.o_proj +Pruning ... +0.3914257138967514 0.09142573177814484 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.322501782332778 0.9709505944546686 1.0 +err_prefin 10807.0009765625 +err_fin 5231.7880859375 +sparsity check 0.3914257138967514 +time 67.84 +14 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 716520.875 +err_fin 529446.0 +sparsity check 0.3999999931880406 +time 137.80 +14 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 689797.25 +err_fin 507253.46875 +sparsity check 0.3999999931880406 +time 138.19 +14 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 27627.48046875 +err_fin 25726.07421875 +sparsity check 0.3999999931880406 +time 135.63 +15 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 214802.015625 +err_fin 160035.40625 +sparsity check 0.39998745918273926 +time 74.92 +15 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 72588.5859375 +err_fin 58471.12109375 +sparsity check 0.39999985694885254 +time 1.33 +15 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 40562.40625 +err_fin 36233.296875 +sparsity check 0.39999985694885254 +time 1.32 +15 self_attn.o_proj +Pruning ... +0.38133588433265686 0.08133590221405029 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2881597988885451 0.9709505944546686 1.0 +err_prefin 8441.4765625 +err_fin 4586.09619140625 +sparsity check 0.38133588433265686 +time 67.87 +15 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 772686.375 +err_fin 576230.125 +sparsity check 0.3999999931880406 +time 137.78 +15 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 746677.375 +err_fin 554146.5625 +sparsity check 0.3999999931880406 +time 138.06 +15 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 31399.396484375 +err_fin 29217.23828125 +sparsity check 0.3999999931880406 +time 135.68 +16 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 208473.703125 +err_fin 157294.0625 +sparsity check 0.39998745918273926 +time 74.95 +16 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 79562.640625 +err_fin 64505.265625 +sparsity check 0.39999985694885254 +time 1.34 +16 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 37636.8671875 +err_fin 33756.921875 +sparsity check 0.39999985694885254 +time 1.31 +16 self_attn.o_proj +Pruning ... +0.3909340649843216 0.09093408286571503 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3208708803257494 0.9709505944546686 1.0 +err_prefin 8207.4296875 +err_fin 4186.65625 +sparsity check 0.3909340649843216 +time 67.84 +16 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 809634.875 +err_fin 610233.625 +sparsity check 0.3999999931880406 +time 137.77 +16 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 785285.875 +err_fin 589506.125 +sparsity check 0.3999999931880406 +time 138.07 +16 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 32459.40234375 +err_fin 30296.30078125 +sparsity check 0.3999999931880406 +time 135.68 +17 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 267344.5 +err_fin 199147.96875 +sparsity check 0.39998745918273926 +time 74.95 +17 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 95325.3203125 +err_fin 75927.9921875 +sparsity check 0.39999985694885254 +time 1.33 +17 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 33506.296875 +err_fin 29167.193359375 +sparsity check 0.39999985694885254 +time 1.32 +17 self_attn.o_proj +Pruning ... +0.39738740026950836 0.0973874181509018 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3419496463900018 0.9709505944546686 1.0 +err_prefin 13549.751953125 +err_fin 6180.005859375 +sparsity check 0.39738740026950836 +time 67.86 +17 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 804724.1875 +err_fin 598717.0625 +sparsity check 0.3999999931880406 +time 137.78 +17 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 782598.375 +err_fin 579610.25 +sparsity check 0.3999999931880406 +time 138.09 +17 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 40987.078125 +err_fin 37530.9296875 +sparsity check 0.3999999931880406 +time 135.67 +18 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 253699.828125 +err_fin 190201.734375 +sparsity check 0.39998745918273926 +time 74.94 +18 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 87223.015625 +err_fin 71011.21875 +sparsity check 0.39999985694885254 +time 1.34 +18 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 50740.96875 +err_fin 45821.36328125 +sparsity check 0.39999985694885254 +time 1.31 +18 self_attn.o_proj +Pruning ... +0.3879019021987915 0.08790192008018494 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3107185316560104 0.9709505944546686 1.0 +err_prefin 14054.927734375 +err_fin 6564.66162109375 +sparsity check 0.3879019021987915 +time 67.86 +18 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 884590.0625 +err_fin 660727.25 +sparsity check 0.3999999931880406 +time 137.76 +18 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 854321.8125 +err_fin 635184.0 +sparsity check 0.3999999931880406 +time 138.15 +18 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 45267.578125 +err_fin 41552.92578125 +sparsity check 0.3999999931880406 +time 135.60 +19 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 297510.625 +err_fin 226424.546875 +sparsity check 0.39998745918273926 +time 74.94 +19 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 93994.25 +err_fin 79128.078125 +sparsity check 0.39999985694885254 +time 1.31 +19 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 59418.61328125 +err_fin 54665.296875 +sparsity check 0.39999985694885254 +time 1.31 +19 self_attn.o_proj +Pruning ... +0.38000810146331787 0.0800081193447113 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.283498675942924 0.9709505944546686 1.0 +err_prefin 9839.859375 +err_fin 5089.5126953125 +sparsity check 0.38000810146331787 +time 67.90 +19 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 959164.4375 +err_fin 721920.1875 +sparsity check 0.3999999931880406 +time 137.78 +19 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 927413.125 +err_fin 695242.5 +sparsity check 0.3999999931880406 +time 137.98 +19 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 46537.91796875 +err_fin 43178.7578125 +sparsity check 0.3999999931880406 +time 135.66 +20 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 262611.65625 +err_fin 202144.71875 +sparsity check 0.39998745918273926 +time 74.95 +20 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 85072.703125 +err_fin 70396.765625 +sparsity check 0.39999985694885254 +time 1.35 +20 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 48525.015625 +err_fin 44793.02734375 +sparsity check 0.39999985694885254 +time 1.32 +20 self_attn.o_proj +Pruning ... +0.38832084834575653 0.08832086622714996 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3121309911892807 0.9709505944546686 1.0 +err_prefin 8732.67578125 +err_fin 3871.71484375 +sparsity check 0.38832084834575653 +time 67.87 +20 mlp.gate_proj +Pruning ... +0.39999318974358694 0.19997616112232208 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.13377813625084 0.9709505944546686 1.0 +err_prefin 1027752.75 +err_fin 777022.5 +sparsity check 0.39999318974358694 +time 137.78 +20 mlp.up_proj +Pruning ... +0.39999318974358694 0.19997616112232208 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.13377813625084 0.9709505944546686 1.0 +err_prefin 994249.375 +err_fin 748517.75 +sparsity check 0.39999318974358694 +time 138.02 +20 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 47753.5234375 +err_fin 44481.6484375 +sparsity check 0.3999999931880406 +time 135.76 +21 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 166584.8125 +err_fin 127685.875 +sparsity check 0.39998745918273926 +time 74.95 +21 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 60790.12890625 +err_fin 49373.1796875 +sparsity check 0.39999985694885254 +time 1.32 +21 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 42025.9140625 +err_fin 38236.7421875 +sparsity check 0.39999985694885254 +time 1.31 +21 self_attn.o_proj +Pruning ... +0.3720995783805847 0.07209959626197815 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2550014079986151 0.9709505944546686 1.0 +err_prefin 10183.21875 +err_fin 5230.98681640625 +sparsity check 0.3720995783805847 +time 67.90 +21 mlp.gate_proj +Pruning ... +0.39999318974358694 0.19997616112232208 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.13377813625084 0.9709505944546686 1.0 +err_prefin 1073376.125 +err_fin 813125.625 +sparsity check 0.39999318974358694 +time 137.77 +21 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 1042929.4375 +err_fin 787203.875 +sparsity check 0.3999999931880406 +time 138.03 +21 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 49620.4609375 +err_fin 46504.046875 +sparsity check 0.3999999931880406 +time 135.70 +22 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 158965.96875 +err_fin 122928.96875 +sparsity check 0.39998745918273926 +time 75.08 +22 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 66577.109375 +err_fin 56351.9765625 +sparsity check 0.39999985694885254 +time 1.34 +22 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 47186.84375 +err_fin 43472.34375 +sparsity check 0.39999985694885254 +time 1.31 +22 self_attn.o_proj +Pruning ... +0.39435090124607086 0.0943509191274643 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.332119125472264 0.9709505944546686 1.0 +err_prefin 7203.67578125 +err_fin 3286.880859375 +sparsity check 0.39435090124607086 +time 67.96 +22 mlp.gate_proj +Pruning ... +0.39999318974358694 0.19997616112232208 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.13377813625084 0.9709505944546686 1.0 +err_prefin 1123434.375 +err_fin 853420.625 +sparsity check 0.39999318974358694 +time 137.99 +22 mlp.up_proj +Pruning ... +0.39999318974358694 0.19997616112232208 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.13377813625084 0.9709505944546686 1.0 +err_prefin 1094003.25 +err_fin 828283.125 +sparsity check 0.39999318974358694 +time 138.56 +22 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 50516.125 +err_fin 47532.46875 +sparsity check 0.3999999931880406 +time 135.68 +23 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 229021.375 +err_fin 175342.1875 +sparsity check 0.39998745918273926 +time 75.01 +23 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 91112.59375 +err_fin 75837.8359375 +sparsity check 0.39999985694885254 +time 1.33 +23 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 50323.359375 +err_fin 46203.8828125 +sparsity check 0.39999985694885254 +time 1.31 +23 self_attn.o_proj +Pruning ... +0.388177290558815 0.08817730844020844 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3116473469240155 0.9709505944546686 1.0 +err_prefin 10403.603515625 +err_fin 5000.8935546875 +sparsity check 0.388177290558815 +time 67.92 +23 mlp.gate_proj +Pruning ... +0.39999318974358694 0.19997616112232208 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.13377813625084 0.9709505944546686 1.0 +err_prefin 1150988.75 +err_fin 876287.0625 +sparsity check 0.39999318974358694 +time 138.11 +23 mlp.up_proj +Pruning ... +0.39999318974358694 0.19997616112232208 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.13377813625084 0.9709505944546686 1.0 +err_prefin 1123824.5 +err_fin 852309.5625 +sparsity check 0.39999318974358694 +time 138.39 +23 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 52224.23828125 +err_fin 48965.41015625 +sparsity check 0.3999999931880406 +time 135.98 +24 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 203358.765625 +err_fin 153987.578125 +sparsity check 0.39998745918273926 +time 75.00 +24 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 80287.296875 +err_fin 65322.984375 +sparsity check 0.39999985694885254 +time 1.32 +24 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 38595.3125 +err_fin 34314.96484375 +sparsity check 0.39999985694885254 +time 1.31 +24 self_attn.o_proj +Pruning ... +0.3913518041372299 0.09135182201862335 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.322256876856434 0.9709505944546686 1.0 +err_prefin 9357.115234375 +err_fin 4763.25439453125 +sparsity check 0.3913518041372299 +time 67.88 +24 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 1118270.0 +err_fin 854237.875 +sparsity check 0.3999999931880406 +time 138.05 +24 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 1101718.75 +err_fin 839207.4375 +sparsity check 0.3999999931880406 +time 138.47 +24 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 52586.859375 +err_fin 49340.09765625 +sparsity check 0.3999999931880406 +time 135.86 +25 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 236197.78125 +err_fin 179507.046875 +sparsity check 0.39998745918273926 +time 75.00 +25 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 91753.109375 +err_fin 74472.09375 +sparsity check 0.39999985694885254 +time 1.35 +25 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 40073.37109375 +err_fin 35583.90234375 +sparsity check 0.39999985694885254 +time 1.31 +25 self_attn.o_proj +Pruning ... +0.3883741497993469 0.08837416768074036 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3123104690436524 0.9709505944546686 1.0 +err_prefin 21641.259765625 +err_fin 11300.3251953125 +sparsity check 0.3883741497993469 +time 67.91 +25 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 1072256.875 +err_fin 797672.1875 +sparsity check 0.3999999931880406 +time 138.07 +25 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 1065880.375 +err_fin 789996.25 +sparsity check 0.3999999931880406 +time 138.31 +25 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 72162.203125 +err_fin 66710.90625 +sparsity check 0.3999999931880406 +time 135.91 +26 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 345840.21875 +err_fin 262721.625 +sparsity check 0.39998745918273926 +time 74.99 +26 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 117953.203125 +err_fin 98765.1875 +sparsity check 0.39999985694885254 +time 1.32 +26 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 61456.359375 +err_fin 55078.0625 +sparsity check 0.39999985694885254 +time 1.31 +26 self_attn.o_proj +Pruning ... +0.39200539886951447 0.0920054167509079 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3244193268192328 0.9709505944546686 1.0 +err_prefin 25394.7890625 +err_fin 13353.552734375 +sparsity check 0.39200539886951447 +time 67.88 +26 mlp.gate_proj +Pruning ... +0.39999318974358694 0.19997616112232208 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.13377813625084 0.9709505944546686 1.0 +err_prefin 1216229.375 +err_fin 914831.125 +sparsity check 0.39999318974358694 +time 138.07 +26 mlp.up_proj +Pruning ... +0.39999318974358694 0.19997616112232208 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.13377813625084 0.9709505944546686 1.0 +err_prefin 1202893.0 +err_fin 901367.75 +sparsity check 0.39999318974358694 +time 138.37 +26 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 71759.484375 +err_fin 67622.875 +sparsity check 0.3999999931880406 +time 135.96 +27 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 157163.765625 +err_fin 121445.734375 +sparsity check 0.39998745918273926 +time 74.97 +27 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 68836.203125 +err_fin 57562.640625 +sparsity check 0.39999985694885254 +time 1.35 +27 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 50982.28515625 +err_fin 47113.44921875 +sparsity check 0.39999985694885254 +time 1.31 +27 self_attn.o_proj +Pruning ... +0.39123478531837463 0.09123480319976807 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3218689321241066 0.9709505944546686 1.0 +err_prefin 10619.111328125 +err_fin 4833.744140625 +sparsity check 0.39123478531837463 +time 67.90 +27 mlp.gate_proj +Pruning ... +0.39999318974358694 0.19997616112232208 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.13377813625084 0.9709505944546686 1.0 +err_prefin 1286122.75 +err_fin 972871.5 +sparsity check 0.39999318974358694 +time 138.01 +27 mlp.up_proj +Pruning ... +0.39999318974358694 0.19997616112232208 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.13377813625084 0.9709505944546686 1.0 +err_prefin 1268991.125 +err_fin 955722.75 +sparsity check 0.39999318974358694 +time 138.30 +27 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 75729.375 +err_fin 71600.2578125 +sparsity check 0.3999999931880406 +time 135.90 +28 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 285951.84375 +err_fin 220914.515625 +sparsity check 0.39998745918273926 +time 74.97 +28 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 106908.3359375 +err_fin 92089.265625 +sparsity check 0.39999985694885254 +time 1.33 +28 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 78111.703125 +err_fin 72002.25 +sparsity check 0.39999985694885254 +time 1.31 +28 self_attn.o_proj +Pruning ... +0.38880935311317444 0.08880937099456787 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3137739943616542 0.9709505944546686 1.0 +err_prefin 18052.15625 +err_fin 9518.7861328125 +sparsity check 0.38880935311317444 +time 67.91 +28 mlp.gate_proj +Pruning ... +0.39999318974358694 0.19997616112232208 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.13377813625084 0.9709505944546686 1.0 +err_prefin 1357190.625 +err_fin 1033252.6875 +sparsity check 0.39999318974358694 +time 137.84 +28 mlp.up_proj +Pruning ... +0.39999318974358694 0.19997616112232208 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.13377813625084 0.9709505944546686 1.0 +err_prefin 1338597.375 +err_fin 1014937.4375 +sparsity check 0.39999318974358694 +time 138.04 +28 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 80249.2890625 +err_fin 76092.71875 +sparsity check 0.3999999931880406 +time 135.74 +29 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 283216.46875 +err_fin 219701.96875 +sparsity check 0.39998745918273926 +time 74.99 +29 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 100867.9296875 +err_fin 85445.09375 +sparsity check 0.39999985694885254 +time 1.32 +29 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 83404.8984375 +err_fin 77111.921875 +sparsity check 0.39999985694885254 +time 1.32 +29 self_attn.o_proj +Pruning ... +0.38364720344543457 0.083647221326828 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2961926723983255 0.9709505944546686 1.0 +err_prefin 18665.7265625 +err_fin 10149.21484375 +sparsity check 0.38364720344543457 +time 67.86 +29 mlp.gate_proj +Pruning ... +0.39999318974358694 0.19997616112232208 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.13377813625084 0.9709505944546686 1.0 +err_prefin 1436191.25 +err_fin 1099280.125 +sparsity check 0.39999318974358694 +time 137.85 +29 mlp.up_proj +Pruning ... +0.39999318974358694 0.19997616112232208 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.13377813625084 0.9709505944546686 1.0 +err_prefin 1411887.5 +err_fin 1076554.625 +sparsity check 0.39999318974358694 +time 138.10 +29 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 84264.09375 +err_fin 80249.1875 +sparsity check 0.3999999931880406 +time 135.73 +30 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 147917.53125 +err_fin 115567.9921875 +sparsity check 0.39998745918273926 +time 74.99 +30 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 53050.2109375 +err_fin 45327.39453125 +sparsity check 0.39999985694885254 +time 1.34 +30 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 75653.828125 +err_fin 70534.90625 +sparsity check 0.39999985694885254 +time 1.32 +30 self_attn.o_proj +Pruning ... +0.3966815769672394 0.09668159484863281 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3396782334429505 0.9709505944546686 1.0 +err_prefin 9131.25390625 +err_fin 4687.3447265625 +sparsity check 0.3966815769672394 +time 67.88 +30 mlp.gate_proj +Pruning ... +0.39999318974358694 0.19997616112232208 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.13377813625084 0.9709505944546686 1.0 +err_prefin 1520840.0 +err_fin 1170527.75 +sparsity check 0.39999318974358694 +time 137.83 +30 mlp.up_proj +Pruning ... +0.39999318974358694 0.19997616112232208 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.13377813625084 0.9709505944546686 1.0 +err_prefin 1489470.5 +err_fin 1141605.0 +sparsity check 0.39999318974358694 +time 138.03 +30 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 87122.390625 +err_fin 83041.5859375 +sparsity check 0.3999999931880406 +time 135.72 +31 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 300016.6875 +err_fin 233821.0625 +sparsity check 0.39998745918273926 +time 74.96 +31 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 101753.1328125 +err_fin 87469.875 +sparsity check 0.39999985694885254 +time 1.31 +31 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 83331.7890625 +err_fin 75658.828125 +sparsity check 0.39999985694885254 +time 1.31 +31 self_attn.o_proj +Pruning ... +0.3851250559091568 0.08512507379055023 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3012760111818993 0.9709505944546686 1.0 +err_prefin 18780.552734375 +err_fin 10706.595703125 +sparsity check 0.3851250559091568 +time 67.86 +31 mlp.gate_proj +Pruning ... +0.39999318974358694 0.19997616112232208 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.13377813625084 0.9709505944546686 1.0 +err_prefin 1585097.625 +err_fin 1230850.0 +sparsity check 0.39999318974358694 +time 137.81 +31 mlp.up_proj +Pruning ... +0.3999858626297542 0.19995051622390747 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.133763479600881 0.9709505944546686 1.0 +err_prefin 1549287.75 +err_fin 1198593.0 +sparsity check 0.3999858626297542 +time 138.09 +31 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 88640.484375 +err_fin 84655.578125 +sparsity check 0.3999999931880406 +time 135.74 +32 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 334009.03125 +err_fin 262031.40625 +sparsity check 0.39998745918273926 +time 74.97 +32 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 108645.09375 +err_fin 94478.96875 +sparsity check 0.39999985694885254 +time 1.33 +32 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 74391.625 +err_fin 69028.0625 +sparsity check 0.39999985694885254 +time 1.31 +32 self_attn.o_proj +Pruning ... +0.39311298727989197 0.0931130051612854 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3280670181258327 0.9709505944546686 1.0 +err_prefin 13726.9619140625 +err_fin 7054.3671875 +sparsity check 0.39311298727989197 +time 67.89 +32 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 1625557.375 +err_fin 1258778.0 +sparsity check 0.3999999931880406 +time 137.82 +32 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 1591531.75 +err_fin 1228276.25 +sparsity check 0.3999999931880406 +time 138.04 +32 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 93971.7265625 +err_fin 89482.7890625 +sparsity check 0.3999999931880406 +time 135.73 +33 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 475604.625 +err_fin 369777.0625 +sparsity check 0.39998745918273926 +time 74.97 +33 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 136738.765625 +err_fin 116955.046875 +sparsity check 0.39999985694885254 +time 1.33 +33 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 83920.1875 +err_fin 76452.140625 +sparsity check 0.39999985694885254 +time 1.31 +33 self_attn.o_proj +Pruning ... +0.3873485326766968 0.08734855055809021 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3088480261314723 0.9709505944546686 1.0 +err_prefin 28934.658203125 +err_fin 15025.1640625 +sparsity check 0.3873485326766968 +time 67.87 +33 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 1646083.75 +err_fin 1256310.375 +sparsity check 0.3999999931880406 +time 137.81 +33 mlp.up_proj +Pruning ... +0.39999318974358694 0.19997616112232208 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.13377813625084 0.9709505944546686 1.0 +err_prefin 1618582.0 +err_fin 1230377.625 +sparsity check 0.39999318974358694 +time 138.08 +33 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 107294.1640625 +err_fin 101588.65625 +sparsity check 0.3999999931880406 +time 135.70 +34 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 173598.65625 +err_fin 133135.75 +sparsity check 0.39998745918273926 +time 74.96 +34 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 72545.25 +err_fin 58444.1015625 +sparsity check 0.39999985694885254 +time 1.33 +34 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 50414.578125 +err_fin 45054.91015625 +sparsity check 0.39999985694885254 +time 1.32 +34 self_attn.o_proj +Pruning ... +0.390749529004097 0.09074954688549042 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.320257648229824 0.9709505944546686 1.0 +err_prefin 18316.734375 +err_fin 9629.263671875 +sparsity check 0.390749529004097 +time 67.87 +34 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 1701501.0 +err_fin 1307156.5 +sparsity check 0.3999999931880406 +time 137.79 +34 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 1672554.5 +err_fin 1279826.5 +sparsity check 0.3999999931880406 +time 138.04 +34 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 116514.109375 +err_fin 110948.59375 +sparsity check 0.3999999931880406 +time 135.70 +35 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 282674.09375 +err_fin 219742.546875 +sparsity check 0.39998745918273926 +time 74.96 +35 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 91505.140625 +err_fin 75506.5625 +sparsity check 0.39999985694885254 +time 1.33 +35 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 68267.65625 +err_fin 62404.51953125 +sparsity check 0.39999985694885254 +time 1.32 +35 self_attn.o_proj +Pruning ... +0.37593840062618256 0.07593841850757599 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2689945890721086 0.9709505944546686 1.0 +err_prefin 21132.0 +err_fin 10591.478515625 +sparsity check 0.37593840062618256 +time 67.89 +35 mlp.gate_proj +Pruning ... +0.3999858626297542 0.19995051622390747 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.133763479600881 0.9709505944546686 1.0 +err_prefin 1804963.75 +err_fin 1390331.75 +sparsity check 0.3999858626297542 +time 137.82 +35 mlp.up_proj +Pruning ... +0.3999858626297542 0.19995051622390747 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.133763479600881 0.9709505944546686 1.0 +err_prefin 1771623.25 +err_fin 1359545.625 +sparsity check 0.3999858626297542 +time 138.07 +35 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 124995.6875 +err_fin 119226.5234375 +sparsity check 0.3999999931880406 +time 135.70 +36 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 270950.5 +err_fin 210602.453125 +sparsity check 0.39998745918273926 +time 74.96 +36 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 88066.09375 +err_fin 73971.8671875 +sparsity check 0.39999985694885254 +time 1.33 +36 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 66076.25 +err_fin 60804.046875 +sparsity check 0.39999985694885254 +time 1.31 +36 self_attn.o_proj +Pruning ... +0.38336338102817535 0.08336339890956879 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2952117289462595 0.9709505944546686 1.0 +err_prefin 14990.6181640625 +err_fin 7396.81787109375 +sparsity check 0.38336338102817535 +time 67.89 +36 mlp.gate_proj +Pruning ... +0.3999858626297542 0.19995051622390747 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.133763479600881 0.9709505944546686 1.0 +err_prefin 1848811.25 +err_fin 1421905.375 +sparsity check 0.3999858626297542 +time 137.84 +36 mlp.up_proj +Pruning ... +0.3999858626297542 0.19995051622390747 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.133763479600881 0.9709505944546686 1.0 +err_prefin 1815923.375 +err_fin 1391225.75 +sparsity check 0.3999858626297542 +time 138.00 +36 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 139403.25 +err_fin 132602.0 +sparsity check 0.3999999931880406 +time 135.73 +37 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 429569.09375 +err_fin 332502.1875 +sparsity check 0.39998745918273926 +time 74.97 +37 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 123268.078125 +err_fin 103549.9375 +sparsity check 0.39999985694885254 +time 1.31 +37 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 85408.6953125 +err_fin 78623.1171875 +sparsity check 0.39999985694885254 +time 1.30 +37 self_attn.o_proj +Pruning ... +0.3839271813631058 0.0839271992444992 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2971588429930576 0.9709505944546686 1.0 +err_prefin 31056.51171875 +err_fin 13986.1318359375 +sparsity check 0.3839271813631058 +time 67.89 +37 mlp.gate_proj +Pruning ... +0.3999858626297542 0.19995051622390747 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.133763479600881 0.9709505944546686 1.0 +err_prefin 1934857.0 +err_fin 1477883.0 +sparsity check 0.3999858626297542 +time 137.82 +37 mlp.up_proj +Pruning ... +0.39999266607420786 0.19997432827949524 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337770887951855 0.9709505944546686 1.0 +err_prefin 1900476.25 +err_fin 1446482.0 +sparsity check 0.39999266607420786 +time 138.10 +37 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 153010.03125 +err_fin 145332.546875 +sparsity check 0.3999999931880406 +time 135.72 +38 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 480920.25 +err_fin 367853.8125 +sparsity check 0.39998745918273926 +time 74.98 +38 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 137635.5 +err_fin 111749.28125 +sparsity check 0.39999985694885254 +time 1.33 +38 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 91433.40625 +err_fin 82317.390625 +sparsity check 0.39999985694885254 +time 1.31 +38 self_attn.o_proj +Pruning ... +0.38491450250148773 0.08491452038288116 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3005542614358636 0.9709505944546686 1.0 +err_prefin 53930.984375 +err_fin 24837.0546875 +sparsity check 0.38491450250148773 +time 67.91 +38 mlp.gate_proj +Pruning ... +0.39999266607420786 0.19997432827949524 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337770887951855 0.9709505944546686 1.0 +err_prefin 1996669.0 +err_fin 1521184.0 +sparsity check 0.39999266607420786 +time 137.80 +38 mlp.up_proj +Pruning ... +0.39999266607420786 0.19997432827949524 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337770887951855 0.9709505944546686 1.0 +err_prefin 1965801.75 +err_fin 1491614.75 +sparsity check 0.39999266607420786 +time 138.05 +38 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 172013.5625 +err_fin 163074.984375 +sparsity check 0.3999999931880406 +time 135.74 +39 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 588931.125 +err_fin 449892.03125 +sparsity check 0.39998745918273926 +time 74.97 +39 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 169708.15625 +err_fin 144543.78125 +sparsity check 0.39999985694885254 +time 1.32 +39 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 99360.171875 +err_fin 89540.0 +sparsity check 0.39999985694885254 +time 1.31 +39 self_attn.o_proj +Pruning ... +0.39136700332164764 0.09136702120304108 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3223072482351075 0.9709505944546686 1.0 +err_prefin 62931.125 +err_fin 28539.208984375 +sparsity check 0.39136700332164764 +time 67.88 +39 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 2023570.5 +err_fin 1523428.0 +sparsity check 0.3999999931880406 +time 137.80 +39 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 1995750.25 +err_fin 1497013.0 +sparsity check 0.3999999931880406 +time 138.07 +39 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 209161.71875 +err_fin 196051.59375 +sparsity check 0.3999999931880406 +time 135.72 +40 self_attn.q_proj +Pruning ... +0.3999999761581421 0.09999999403953552 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502864520676674 0.9709505944546686 1.0 +err_prefin 548435.0 +err_fin 412508.59375 +sparsity check 0.3999999761581421 +time 74.96 +40 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 148578.1875 +err_fin 126080.5234375 +sparsity check 0.39999985694885254 +time 1.32 +40 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 112805.53125 +err_fin 100186.0625 +sparsity check 0.39999985694885254 +time 1.31 +40 self_attn.o_proj +Pruning ... +0.3920653164386749 0.09206533432006836 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3246171971444998 0.9709505944546686 1.0 +err_prefin 90355.03125 +err_fin 47657.5546875 +sparsity check 0.3920653164386749 +time 67.89 +40 mlp.gate_proj +Pruning ... +0.39999266607420786 0.19997432827949524 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337770887951855 0.9709505944546686 1.0 +err_prefin 2109784.0 +err_fin 1561325.0 +sparsity check 0.39999266607420786 +time 137.79 +40 mlp.up_proj +Pruning ... +0.39999266607420786 0.19997432827949524 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337770887951855 0.9709505944546686 1.0 +err_prefin 2077427.0 +err_fin 1531031.5 +sparsity check 0.39999266607420786 +time 138.02 +40 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 237795.90625 +err_fin 223116.65625 +sparsity check 0.3999999931880406 +time 135.74 +41 self_attn.q_proj +Pruning ... +0.3999999761581421 0.09999999403953552 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502864520676674 0.9709505944546686 1.0 +err_prefin 454291.125 +err_fin 336475.4375 +sparsity check 0.3999999761581421 +time 74.95 +41 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 128970.75 +err_fin 107213.53125 +sparsity check 0.39999985694885254 +time 1.33 +41 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 98359.6640625 +err_fin 87798.2109375 +sparsity check 0.39999985694885254 +time 1.32 +41 self_attn.o_proj +Pruning ... +0.38767126202583313 0.08767127990722656 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.30993959080064 0.9709505944546686 1.0 +err_prefin 88793.8515625 +err_fin 42478.0703125 +sparsity check 0.38767126202583313 +time 67.88 +41 mlp.gate_proj +Pruning ... +0.39999266607420786 0.19997432827949524 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337770887951855 0.9709505944546686 1.0 +err_prefin 2213341.0 +err_fin 1599059.625 +sparsity check 0.39999266607420786 +time 137.77 +41 mlp.up_proj +Pruning ... +0.39999266607420786 0.19997432827949524 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337770887951855 0.9709505944546686 1.0 +err_prefin 2137321.75 +err_fin 1535685.0 +sparsity check 0.39999266607420786 +time 138.05 +41 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 277058.8125 +err_fin 257838.4375 +sparsity check 0.3999999931880406 +time 135.72 +42 self_attn.q_proj +Pruning ... +0.3999999761581421 0.09999999403953552 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502864520676674 0.9709505944546686 1.0 +err_prefin 519707.0625 +err_fin 379932.5625 +sparsity check 0.3999999761581421 +time 74.97 +42 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 132640.8125 +err_fin 109983.7734375 +sparsity check 0.39999985694885254 +time 1.33 +42 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 120522.5546875 +err_fin 108689.6484375 +sparsity check 0.39999985694885254 +time 1.30 +42 self_attn.o_proj +Pruning ... +0.38890962302684784 0.08890964090824127 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3141107083100954 0.9709505944546686 1.0 +err_prefin 78469.5859375 +err_fin 39123.4453125 +sparsity check 0.38890962302684784 +time 67.87 +42 mlp.gate_proj +Pruning ... +0.39999266607420786 0.19997432827949524 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337770887951855 0.9709505944546686 1.0 +err_prefin 2396299.5 +err_fin 1715538.75 +sparsity check 0.39999266607420786 +time 137.78 +42 mlp.up_proj +Pruning ... +0.39999266607420786 0.19997432827949524 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337770887951855 0.9709505944546686 1.0 +err_prefin 2258508.0 +err_fin 1608224.0 +sparsity check 0.39999266607420786 +time 138.04 +42 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 300451.25 +err_fin 279095.375 +sparsity check 0.3999999931880406 +time 135.72 +43 self_attn.q_proj +Pruning ... +0.3999999761581421 0.09999999403953552 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502864520676674 0.9709505944546686 1.0 +err_prefin 361255.3125 +err_fin 262990.71875 +sparsity check 0.3999999761581421 +time 74.96 +43 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 117103.0234375 +err_fin 96359.265625 +sparsity check 0.39999985694885254 +time 1.34 +43 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 96491.140625 +err_fin 84371.9375 +sparsity check 0.39999985694885254 +time 1.30 +43 self_attn.o_proj +Pruning ... +0.38483870029449463 0.08483871817588806 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3002942199017686 0.9709505944546686 1.0 +err_prefin 58077.65625 +err_fin 31061.318359375 +sparsity check 0.38483870029449463 +time 67.84 +43 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 2494903.0 +err_fin 1775033.75 +sparsity check 0.3999999931880406 +time 137.80 +43 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 2325850.5 +err_fin 1646176.75 +sparsity check 0.3999999931880406 +time 137.95 +43 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 330748.3125 +err_fin 306762.3125 +sparsity check 0.3999999931880406 +time 135.70 +44 self_attn.q_proj +Pruning ... +0.3999999761581421 0.09999999403953552 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502864520676674 0.9709505944546686 1.0 +err_prefin 630344.125 +err_fin 458141.21875 +sparsity check 0.3999999761581421 +time 74.96 +44 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 131598.953125 +err_fin 111503.53125 +sparsity check 0.39999985694885254 +time 1.32 +44 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 148579.09375 +err_fin 134851.46875 +sparsity check 0.39999985694885254 +time 1.33 +44 self_attn.o_proj +Pruning ... +0.39769303798675537 0.0976930558681488 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3429306804428647 0.9709505944546686 1.0 +err_prefin 156206.703125 +err_fin 83971.65625 +sparsity check 0.39769303798675537 +time 67.85 +44 mlp.gate_proj +Pruning ... +0.39999266607420786 0.19997432827949524 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337770887951855 0.9709505944546686 1.0 +err_prefin 2738918.75 +err_fin 1918952.75 +sparsity check 0.39999266607420786 +time 137.78 +44 mlp.up_proj +Pruning ... +0.39999266607420786 0.19997432827949524 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337770887951855 0.9709505944546686 1.0 +err_prefin 2463815.5 +err_fin 1714715.75 +sparsity check 0.39999266607420786 +time 138.15 +44 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 383154.3125 +err_fin 350069.21875 +sparsity check 0.3999999931880406 +time 135.36 +45 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 411766.8125 +err_fin 293813.1875 +sparsity check 0.39998745918273926 +time 74.93 +45 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 122174.71875 +err_fin 101486.4609375 +sparsity check 0.39999985694885254 +time 1.35 +45 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 137134.6875 +err_fin 124872.6953125 +sparsity check 0.39999985694885254 +time 1.32 +45 self_attn.o_proj +Pruning ... +0.3968270719051361 0.09682708978652954 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.340147122923055 0.9709505944546686 1.0 +err_prefin 41803.109375 +err_fin 22890.818359375 +sparsity check 0.3968270719051361 +time 67.80 +45 mlp.gate_proj +Pruning ... +0.39999266607420786 0.19997432827949524 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337770887951855 0.9709505944546686 1.0 +err_prefin 2962994.5 +err_fin 2079594.0 +sparsity check 0.39999266607420786 +time 137.80 +45 mlp.up_proj +Pruning ... +0.39998556886400494 0.1999494880437851 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337628919369338 0.9709505944546686 1.0 +err_prefin 2630788.25 +err_fin 1833110.0 +sparsity check 0.39998556886400494 +time 137.97 +45 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 393639.1875 +err_fin 360773.9375 +sparsity check 0.3999999931880406 +time 135.69 +46 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 343920.46875 +err_fin 246327.15625 +sparsity check 0.39998745918273926 +time 74.95 +46 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 98745.546875 +err_fin 84167.328125 +sparsity check 0.39999985694885254 +time 1.35 +46 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 123088.5 +err_fin 112754.875 +sparsity check 0.39999985694885254 +time 1.31 +46 self_attn.o_proj +Pruning ... +0.3924602270126343 0.09246024489402771 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3259197918896706 0.9709505944546686 1.0 +err_prefin 57249.734375 +err_fin 31896.0 +sparsity check 0.3924602270126343 +time 67.91 +46 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 3174013.75 +err_fin 2242079.0 +sparsity check 0.3999999931880406 +time 137.79 +46 mlp.up_proj +Pruning ... +0.39999266607420786 0.19997432827949524 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337770887951855 0.9709505944546686 1.0 +err_prefin 2772648.5 +err_fin 1945844.125 +sparsity check 0.39999266607420786 +time 137.99 +46 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 395180.625 +err_fin 364649.0625 +sparsity check 0.3999999931880406 +time 135.70 +47 self_attn.q_proj +Pruning ... +0.3999999761581421 0.09999999403953552 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502864520676674 0.9709505944546686 1.0 +err_prefin 468856.0625 +err_fin 337035.125 +sparsity check 0.3999999761581421 +time 74.95 +47 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 138887.5625 +err_fin 116853.6171875 +sparsity check 0.39999985694885254 +time 1.34 +47 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 114607.671875 +err_fin 104111.078125 +sparsity check 0.39999985694885254 +time 1.30 +47 self_attn.o_proj +Pruning ... +0.39365212619304657 0.09365214407444 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3298350005144441 0.9709505944546686 1.0 +err_prefin 96371.875 +err_fin 42798.21875 +sparsity check 0.39365212619304657 +time 67.85 +47 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 3290644.75 +err_fin 2296471.5 +sparsity check 0.3999999931880406 +time 137.79 +47 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 2834067.5 +err_fin 1964680.125 +sparsity check 0.3999999931880406 +time 137.97 +47 mlp.down_proj +Pruning ... +0.399999988930566 0.19999997317790985 0.34285713945116314 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917398757333 0.9709505944546686 1.0 +err_prefin 443383.6875 +err_fin 404481.4375 +sparsity check 0.399999988930566 +time 135.68 +48 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 238758.171875 +err_fin 169874.4375 +sparsity check 0.39998745918273926 +time 74.96 +48 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 67367.6953125 +err_fin 56036.6953125 +sparsity check 0.39999985694885254 +time 1.34 +48 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 115441.171875 +err_fin 105516.4609375 +sparsity check 0.39999985694885254 +time 1.31 +48 self_attn.o_proj +Pruning ... +0.3911074250936508 0.09110744297504425 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.32144643241679 0.9709505944546686 1.0 +err_prefin 56382.0859375 +err_fin 30507.6953125 +sparsity check 0.3911074250936508 +time 67.86 +48 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 3435194.75 +err_fin 2387344.75 +sparsity check 0.3999999931880406 +time 137.81 +48 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 2945057.5 +err_fin 2032755.5 +sparsity check 0.3999999931880406 +time 138.02 +48 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 443090.375 +err_fin 406116.25 +sparsity check 0.3999999931880406 +time 135.35 +49 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 139943.1875 +err_fin 99371.890625 +sparsity check 0.39998745918273926 +time 74.97 +49 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 54727.9921875 +err_fin 45486.75390625 +sparsity check 0.39999985694885254 +time 1.33 +49 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 92321.65625 +err_fin 82783.8984375 +sparsity check 0.39999985694885254 +time 1.32 +49 self_attn.o_proj +Pruning ... +0.3994888663291931 0.09948888421058655 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3486641752370372 0.9709505944546686 1.0 +err_prefin 24136.89453125 +err_fin 11916.1640625 +sparsity check 0.3994888663291931 +time 67.85 +49 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 3564298.75 +err_fin 2477942.5 +sparsity check 0.3999999931880406 +time 137.82 +49 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 3051650.25 +err_fin 2108091.5 +sparsity check 0.3999999931880406 +time 137.99 +49 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 444759.9375 +err_fin 409438.4375 +sparsity check 0.3999999931880406 +time 135.74 +50 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 210704.421875 +err_fin 149945.28125 +sparsity check 0.39998745918273926 +time 74.96 +50 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 60009.5078125 +err_fin 50233.8203125 +sparsity check 0.39999985694885254 +time 1.33 +50 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 118265.875 +err_fin 106615.8359375 +sparsity check 0.39999985694885254 +time 1.32 +50 self_attn.o_proj +Pruning ... +0.3945927917957306 0.09459280967712402 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.332907882956055 0.9709505944546686 1.0 +err_prefin 41071.29296875 +err_fin 22746.974609375 +sparsity check 0.3945927917957306 +time 67.86 +50 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 3691318.5 +err_fin 2572185.5 +sparsity check 0.3999999931880406 +time 137.79 +50 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 3151099.25 +err_fin 2181640.75 +sparsity check 0.3999999931880406 +time 138.17 +50 mlp.down_proj +Pruning ... +0.399999988930566 0.19999997317790985 0.34285713945116314 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917398757333 0.9709505944546686 1.0 +err_prefin 447227.28125 +err_fin 412591.90625 +sparsity check 0.399999988930566 +time 135.35 +51 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 271350.0625 +err_fin 193069.9375 +sparsity check 0.39998745918273926 +time 74.97 +51 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 82881.09375 +err_fin 70391.65625 +sparsity check 0.39999985694885254 +time 1.34 +51 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 124578.8125 +err_fin 112905.328125 +sparsity check 0.39999985694885254 +time 1.33 +51 self_attn.o_proj +Pruning ... +0.3849567323923111 0.08495675027370453 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.300699085947819 0.9709505944546686 1.0 +err_prefin 42985.53515625 +err_fin 21869.13671875 +sparsity check 0.3849567323923111 +time 67.89 +51 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 3787472.5 +err_fin 2648911.75 +sparsity check 0.3999999931880406 +time 137.81 +51 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 3242460.0 +err_fin 2253813.0 +sparsity check 0.3999999931880406 +time 138.05 +51 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 449872.78125 +err_fin 415985.8125 +sparsity check 0.3999999931880406 +time 135.71 +52 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 439324.875 +err_fin 315666.40625 +sparsity check 0.39998745918273926 +time 74.97 +52 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 95802.5 +err_fin 82492.7421875 +sparsity check 0.39999985694885254 +time 1.33 +52 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 146872.84375 +err_fin 133930.578125 +sparsity check 0.39999985694885254 +time 1.31 +52 self_attn.o_proj +Pruning ... +0.38917115330696106 0.08917117118835449 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3149881046089533 0.9709505944546686 1.0 +err_prefin 87225.6015625 +err_fin 43163.53125 +sparsity check 0.38917115330696106 +time 67.88 +52 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 3871781.0 +err_fin 2721833.75 +sparsity check 0.3999999931880406 +time 137.91 +52 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 3309534.75 +err_fin 2312042.0 +sparsity check 0.3999999931880406 +time 138.23 +52 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 457674.1875 +err_fin 423815.9375 +sparsity check 0.3999999931880406 +time 135.43 +53 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 147558.15625 +err_fin 106016.625 +sparsity check 0.39998745918273926 +time 74.97 +53 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 43785.1484375 +err_fin 36865.515625 +sparsity check 0.39999985694885254 +time 1.35 +53 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 115249.484375 +err_fin 105056.921875 +sparsity check 0.39999985694885254 +time 1.31 +53 self_attn.o_proj +Pruning ... +0.39926575124263763 0.09926576912403107 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3479546838848155 0.9709505944546686 1.0 +err_prefin 30559.916015625 +err_fin 15811.552734375 +sparsity check 0.39926575124263763 +time 67.89 +53 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 3969673.25 +err_fin 2786505.5 +sparsity check 0.3999999931880406 +time 137.94 +53 mlp.up_proj +Pruning ... +0.3999928959778377 0.1999751329421997 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337775486548314 0.9709505944546686 1.0 +err_prefin 3400897.5 +err_fin 2372340.5 +sparsity check 0.3999928959778377 +time 138.10 +53 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 463003.625 +err_fin 429105.5625 +sparsity check 0.3999999931880406 +time 135.72 +54 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 173454.875 +err_fin 124175.578125 +sparsity check 0.39998745918273926 +time 74.98 +54 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 49409.3046875 +err_fin 41089.84375 +sparsity check 0.39999985694885254 +time 1.36 +54 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 118842.3828125 +err_fin 108371.765625 +sparsity check 0.39999985694885254 +time 1.32 +54 self_attn.o_proj +Pruning ... +0.39291320741176605 0.09291322529315948 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3274106262053968 0.9709505944546686 1.0 +err_prefin 36517.9765625 +err_fin 18781.7265625 +sparsity check 0.39291320741176605 +time 67.88 +54 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 4057488.0 +err_fin 2857511.75 +sparsity check 0.3999999931880406 +time 137.92 +54 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 3488466.5 +err_fin 2440393.25 +sparsity check 0.3999999931880406 +time 138.12 +54 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 463141.15625 +err_fin 430254.9375 +sparsity check 0.3999999931880406 +time 135.75 +55 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 276099.8125 +err_fin 199511.765625 +sparsity check 0.39998745918273926 +time 74.98 +55 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 79841.265625 +err_fin 67909.1484375 +sparsity check 0.39999985694885254 +time 1.34 +55 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 154509.328125 +err_fin 141143.03125 +sparsity check 0.39999985694885254 +time 1.30 +55 self_attn.o_proj +Pruning ... +0.3909609913825989 0.09096100926399231 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.32096030981266 0.9709505944546686 1.0 +err_prefin 64654.1015625 +err_fin 33504.8125 +sparsity check 0.3909609913825989 +time 67.88 +55 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 4126911.25 +err_fin 2913544.0 +sparsity check 0.3999999931880406 +time 137.96 +55 mlp.up_proj +Pruning ... +0.3999931343964168 0.19997596740722656 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337780255445538 0.9709505944546686 1.0 +err_prefin 3576076.0 +err_fin 2509234.0 +sparsity check 0.3999931343964168 +time 138.18 +55 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 473575.40625 +err_fin 441082.65625 +sparsity check 0.3999999931880406 +time 135.51 +56 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 201423.84375 +err_fin 145764.46875 +sparsity check 0.39998745918273926 +time 74.97 +56 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 64095.1484375 +err_fin 53721.40625 +sparsity check 0.39999985694885254 +time 1.34 +56 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 108391.2578125 +err_fin 98795.03125 +sparsity check 0.39999985694885254 +time 1.31 +56 self_attn.o_proj +Pruning ... +0.394983634352684 0.09498365223407745 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3341802617171317 0.9709505944546686 1.0 +err_prefin 35854.6640625 +err_fin 17304.00390625 +sparsity check 0.394983634352684 +time 67.91 +56 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 4181935.75 +err_fin 2950946.75 +sparsity check 0.3999999931880406 +time 137.85 +56 mlp.up_proj +Pruning ... +0.3999931343964168 0.19997596740722656 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337780255445538 0.9709505944546686 1.0 +err_prefin 3632633.0 +err_fin 2548105.5 +sparsity check 0.3999931343964168 +time 138.07 +56 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 479367.90625 +err_fin 446711.625 +sparsity check 0.3999999931880406 +time 135.40 +57 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 193574.265625 +err_fin 139194.71875 +sparsity check 0.39998745918273926 +time 74.96 +57 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 59586.9765625 +err_fin 50896.7578125 +sparsity check 0.39999985694885254 +time 1.34 +57 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 140319.21875 +err_fin 128478.203125 +sparsity check 0.39999985694885254 +time 1.31 +57 self_attn.o_proj +Pruning ... +0.3990766406059265 0.09907665848731995 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3473526955921247 0.9709505944546686 1.0 +err_prefin 37232.3046875 +err_fin 19707.89453125 +sparsity check 0.3990766406059265 +time 67.86 +57 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 4315325.0 +err_fin 3052128.5 +sparsity check 0.3999999931880406 +time 137.92 +57 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 3757622.5 +err_fin 2641883.0 +sparsity check 0.3999999931880406 +time 138.13 +57 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 488616.46875 +err_fin 456110.0625 +sparsity check 0.3999999931880406 +time 135.46 +58 self_attn.q_proj +Pruning ... +0.3999880701303482 0.09998808801174164 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502487097151674 0.9709505944546686 1.0 +err_prefin 109906.765625 +err_fin 79257.90625 +sparsity check 0.3999880701303482 +time 74.97 +58 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 36524.1015625 +err_fin 29866.85546875 +sparsity check 0.39999985694885254 +time 1.36 +58 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 101192.5234375 +err_fin 91440.578125 +sparsity check 0.39999985694885254 +time 1.31 +58 self_attn.o_proj +Pruning ... +0.3996235728263855 0.09962359070777893 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.349092144873774 0.9709505944546686 1.0 +err_prefin 27671.78125 +err_fin 14613.060546875 +sparsity check 0.3996235728263855 +time 67.84 +58 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 4399088.0 +err_fin 3121391.5 +sparsity check 0.3999999931880406 +time 137.91 +58 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 3839110.5 +err_fin 2708475.5 +sparsity check 0.3999999931880406 +time 138.10 +58 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 492001.0 +err_fin 459838.25 +sparsity check 0.3999999931880406 +time 135.44 +59 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 140025.5 +err_fin 100983.1171875 +sparsity check 0.39998745918273926 +time 74.99 +59 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 42919.84375 +err_fin 36084.875 +sparsity check 0.39999985694885254 +time 1.35 +59 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 112994.859375 +err_fin 101844.03125 +sparsity check 0.39999985694885254 +time 1.33 +59 self_attn.o_proj +Pruning ... +0.3997947573661804 0.09979477524757385 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3496355861145926 0.9709505944546686 1.0 +err_prefin 29219.421875 +err_fin 14662.58984375 +sparsity check 0.3997947573661804 +time 67.83 +59 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 4492010.0 +err_fin 3196863.75 +sparsity check 0.3999999931880406 +time 137.86 +59 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 3931143.25 +err_fin 2782485.0 +sparsity check 0.3999999931880406 +time 138.13 +59 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 492903.5625 +err_fin 462014.5 +sparsity check 0.3999999931880406 +time 135.44 +60 self_attn.q_proj +Pruning ... +0.39995187520980835 0.09995189309120178 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.350133957117459 0.9709505944546686 1.0 +err_prefin 24846.77734375 +err_fin 17901.5859375 +sparsity check 0.39995187520980835 +time 74.97 +60 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 9290.47265625 +err_fin 7612.259765625 +sparsity check 0.39999985694885254 +time 1.33 +60 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 59974.8984375 +err_fin 54110.11328125 +sparsity check 0.39999985694885254 +time 1.31 +60 self_attn.o_proj +Pruning ... +0.3999999761581421 0.09999999403953552 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502864520676674 0.9709505944546686 1.0 +err_prefin 17134.6015625 +err_fin 9210.818359375 +sparsity check 0.3999999761581421 +time 67.82 +60 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 4548430.0 +err_fin 3252469.75 +sparsity check 0.3999999931880406 +time 137.90 +60 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 4000529.25 +err_fin 2845059.5 +sparsity check 0.3999999931880406 +time 138.32 +60 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 489890.625 +err_fin 460278.625 +sparsity check 0.3999999931880406 +time 135.45 +61 self_attn.q_proj +Pruning ... +0.39996403455734253 0.09996405243873596 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3501725095304327 0.9709505944546686 1.0 +err_prefin 90896.8515625 +err_fin 66454.25 +sparsity check 0.39996403455734253 +time 74.98 +61 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 30812.04296875 +err_fin 25593.125 +sparsity check 0.39999985694885254 +time 1.33 +61 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 102036.171875 +err_fin 93325.7578125 +sparsity check 0.39999985694885254 +time 1.31 +61 self_attn.o_proj +Pruning ... +0.39953969419002533 0.09953971207141876 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.348825692335962 0.9709505944546686 1.0 +err_prefin 25333.818359375 +err_fin 13513.7431640625 +sparsity check 0.39953969419002533 +time 67.81 +61 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 4626365.0 +err_fin 3314594.0 +sparsity check 0.3999999931880406 +time 137.87 +61 mlp.up_proj +Pruning ... +0.3999931343964168 0.19997596740722656 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337780255445538 0.9709505944546686 1.0 +err_prefin 4089394.75 +err_fin 2914332.5 +sparsity check 0.3999931343964168 +time 138.09 +61 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 492596.8125 +err_fin 463564.75 +sparsity check 0.3999999931880406 +time 135.76 +62 self_attn.q_proj +Pruning ... +0.3999880701303482 0.09998808801174164 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502487097151674 0.9709505944546686 1.0 +err_prefin 99086.1953125 +err_fin 72139.265625 +sparsity check 0.3999880701303482 +time 74.97 +62 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 34142.8046875 +err_fin 28503.0 +sparsity check 0.399999737739563 +time 1.34 +62 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 112070.03125 +err_fin 101395.1171875 +sparsity check 0.39999985694885254 +time 1.31 +62 self_attn.o_proj +Pruning ... +0.3999999761581421 0.09999999403953552 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502864520676674 0.9709505944546686 1.0 +err_prefin 23855.984375 +err_fin 11415.373046875 +sparsity check 0.3999999761581421 +time 67.82 +62 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 4720747.0 +err_fin 3396094.0 +sparsity check 0.3999999931880406 +time 137.92 +62 mlp.up_proj +Pruning ... +0.3999931343964168 0.19997596740722656 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337780255445538 0.9709505944546686 1.0 +err_prefin 4193333.75 +err_fin 3000677.5 +sparsity check 0.3999931343964168 +time 138.15 +62 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 498259.875 +err_fin 469256.65625 +sparsity check 0.3999999931880406 +time 135.88 +63 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 85960.1640625 +err_fin 63232.09765625 +sparsity check 0.39998745918273926 +time 74.95 +63 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 26819.83203125 +err_fin 22145.341796875 +sparsity check 0.39999985694885254 +time 1.36 +63 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 89955.7734375 +err_fin 80309.4609375 +sparsity check 0.39999985694885254 +time 1.32 +63 self_attn.o_proj +Pruning ... +0.3984784632921219 0.09847848117351532 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.345444731275777 0.9709505944546686 1.0 +err_prefin 17989.068359375 +err_fin 9419.4921875 +sparsity check 0.3984784632921219 +time 67.82 +63 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 4814154.0 +err_fin 3475047.5 +sparsity check 0.3999999931880406 +time 137.90 +63 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 4284651.5 +err_fin 3079046.75 +sparsity check 0.3999999931880406 +time 138.06 +63 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 504413.34375 +err_fin 475615.25 +sparsity check 0.3999999931880406 +time 135.35 +64 self_attn.q_proj +Pruning ... +0.3999755531549454 0.0999755710363388 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502090281957484 0.9709505944546686 1.0 +err_prefin 159849.96875 +err_fin 117469.234375 +sparsity check 0.3999755531549454 +time 74.95 +64 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 51413.63671875 +err_fin 42604.359375 +sparsity check 0.39999985694885254 +time 1.35 +64 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 124866.046875 +err_fin 114163.296875 +sparsity check 0.39999985694885254 +time 1.32 +64 self_attn.o_proj +Pruning ... +0.3999999761581421 0.09999999403953552 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502864520676674 0.9709505944546686 1.0 +err_prefin 35489.97265625 +err_fin 16909.37109375 +sparsity check 0.3999999761581421 +time 67.82 +64 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 4902354.0 +err_fin 3547474.5 +sparsity check 0.3999999931880406 +time 137.84 +64 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 4380570.5 +err_fin 3154890.5 +sparsity check 0.3999999931880406 +time 138.05 +64 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 512836.90625 +err_fin 483761.8125 +sparsity check 0.3999999931880406 +time 135.35 +65 self_attn.q_proj +Pruning ... +0.39996445178985596 0.09996446967124939 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.350173832365276 0.9709505944546686 1.0 +err_prefin 45725.81640625 +err_fin 33557.125 +sparsity check 0.39996445178985596 +time 74.94 +65 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 15090.453125 +err_fin 12062.2275390625 +sparsity check 0.39999985694885254 +time 1.33 +65 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 69568.328125 +err_fin 62171.89453125 +sparsity check 0.39999985694885254 +time 1.31 +65 self_attn.o_proj +Pruning ... +0.3986722081899643 0.09867222607135773 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3460633409565008 0.9709505944546686 1.0 +err_prefin 16426.189453125 +err_fin 8142.294921875 +sparsity check 0.3986722081899643 +time 67.88 +65 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 4979698.5 +err_fin 3616654.5 +sparsity check 0.3999999931880406 +time 137.92 +65 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 4469581.0 +err_fin 3233134.0 +sparsity check 0.3999999931880406 +time 138.15 +65 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 521956.0 +err_fin 492626.28125 +sparsity check 0.3999999931880406 +time 135.46 +66 self_attn.q_proj +Pruning ... +0.3999880701303482 0.09998808801174164 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502487097151674 0.9709505944546686 1.0 +err_prefin 82390.8046875 +err_fin 60246.546875 +sparsity check 0.3999880701303482 +time 74.96 +66 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 27396.1171875 +err_fin 22218.01171875 +sparsity check 0.39999985694885254 +time 1.32 +66 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 91186.625 +err_fin 83134.8125 +sparsity check 0.39999985694885254 +time 1.30 +66 self_attn.o_proj +Pruning ... +0.39990170300006866 0.0999017208814621 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3499748560894689 0.9709505944546686 1.0 +err_prefin 27952.85546875 +err_fin 15630.4541015625 +sparsity check 0.39990170300006866 +time 67.81 +66 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 5090460.0 +err_fin 3697180.5 +sparsity check 0.3999999931880406 +time 137.88 +66 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 4601931.0 +err_fin 3329092.75 +sparsity check 0.3999999931880406 +time 138.24 +66 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 545843.5 +err_fin 515164.53125 +sparsity check 0.3999999931880406 +time 135.36 +67 self_attn.q_proj +Pruning ... +0.3999880701303482 0.09998808801174164 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502487097151674 0.9709505944546686 1.0 +err_prefin 35176.55078125 +err_fin 25813.625 +sparsity check 0.3999880701303482 +time 74.97 +67 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 11708.529296875 +err_fin 8978.494140625 +sparsity check 0.39999985694885254 +time 1.35 +67 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 38922.94140625 +err_fin 33378.15625 +sparsity check 0.39999985694885254 +time 1.31 +67 self_attn.o_proj +Pruning ... +0.3936278820037842 0.09362789988517761 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3297556035346028 0.9709505944546686 1.0 +err_prefin 9967.0458984375 +err_fin 4096.6826171875 +sparsity check 0.3936278820037842 +time 67.86 +67 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 5132012.5 +err_fin 3736780.0 +sparsity check 0.3999999931880406 +time 137.93 +67 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 4676647.0 +err_fin 3389882.0 +sparsity check 0.3999999931880406 +time 138.14 +67 mlp.down_proj +Pruning ... +0.399999988930566 0.19999997317790985 0.34285713945116314 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917398757333 0.9709505944546686 1.0 +err_prefin 549749.75 +err_fin 518622.03125 +sparsity check 0.399999988930566 +time 135.45 +68 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 292117.0 +err_fin 215035.8125 +sparsity check 0.39998745918273926 +time 74.96 +68 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 88142.25 +err_fin 74237.4296875 +sparsity check 0.39999985694885254 +time 1.35 +68 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 180795.3125 +err_fin 166177.03125 +sparsity check 0.39999985694885254 +time 1.31 +68 self_attn.o_proj +Pruning ... +0.39890041947364807 0.0989004373550415 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.346791217688506 0.9709505944546686 1.0 +err_prefin 28715.28125 +err_fin 15530.28125 +sparsity check 0.39890041947364807 +time 67.83 +68 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 5310609.5 +err_fin 3873440.0 +sparsity check 0.3999999931880406 +time 137.78 +68 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 4870220.0 +err_fin 3537587.5 +sparsity check 0.3999999931880406 +time 138.03 +68 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 587633.875 +err_fin 553468.3125 +sparsity check 0.3999999931880406 +time 135.36 +69 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 483384.78125 +err_fin 357346.09375 +sparsity check 0.39998745918273926 +time 74.96 +69 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 158684.90625 +err_fin 134880.53125 +sparsity check 0.39999985694885254 +time 1.34 +69 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 241631.8125 +err_fin 220124.5625 +sparsity check 0.39999985694885254 +time 1.32 +69 self_attn.o_proj +Pruning ... +0.3949231803417206 0.09492319822311401 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3339836229488613 0.9709505944546686 1.0 +err_prefin 54212.2265625 +err_fin 26918.04296875 +sparsity check 0.3949231803417206 +time 67.85 +69 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 5407849.0 +err_fin 3944897.0 +sparsity check 0.3999999931880406 +time 137.88 +69 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 5016751.0 +err_fin 3643644.5 +sparsity check 0.3999999931880406 +time 138.15 +69 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 617275.625 +err_fin 581917.125 +sparsity check 0.3999999931880406 +time 135.40 +70 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 282138.375 +err_fin 207488.1875 +sparsity check 0.39998745918273926 +time 74.96 +70 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 89395.515625 +err_fin 73210.3203125 +sparsity check 0.39999985694885254 +time 1.33 +70 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 143147.21875 +err_fin 129311.0 +sparsity check 0.39999985694885254 +time 1.31 +70 self_attn.o_proj +Pruning ... +0.39024436473846436 0.09024438261985779 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3185758861734032 0.9709505944546686 1.0 +err_prefin 43651.9140625 +err_fin 20341.4765625 +sparsity check 0.39024436473846436 +time 67.87 +70 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 5526235.0 +err_fin 4027965.75 +sparsity check 0.3999999931880406 +time 137.82 +70 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 5155715.5 +err_fin 3744852.0 +sparsity check 0.3999999931880406 +time 138.02 +70 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 659116.25 +err_fin 620709.125 +sparsity check 0.3999999931880406 +time 135.35 +71 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 368707.46875 +err_fin 273084.25 +sparsity check 0.39998745918273926 +time 74.94 +71 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 119726.71875 +err_fin 99347.046875 +sparsity check 0.39999985694885254 +time 1.32 +71 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 149877.203125 +err_fin 136371.9375 +sparsity check 0.39999985694885254 +time 1.32 +71 self_attn.o_proj +Pruning ... +0.3951091170310974 0.09510913491249084 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3345882234903663 0.9709505944546686 1.0 +err_prefin 56630.87890625 +err_fin 26684.126953125 +sparsity check 0.3951091170310974 +time 67.87 +71 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 5691556.5 +err_fin 4140972.5 +sparsity check 0.3999999931880406 +time 137.80 +71 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 5325007.0 +err_fin 3861668.25 +sparsity check 0.3999999931880406 +time 138.01 +71 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 695043.375 +err_fin 653343.375 +sparsity check 0.3999999931880406 +time 135.77 +72 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 509801.3125 +err_fin 376212.1875 +sparsity check 0.39998745918273926 +time 74.95 +72 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 150016.125 +err_fin 128747.53125 +sparsity check 0.39999985694885254 +time 1.34 +72 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 172384.1875 +err_fin 157458.328125 +sparsity check 0.39999985694885254 +time 1.32 +72 self_attn.o_proj +Pruning ... +0.39744168519973755 0.09744170308113098 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3421240016705787 0.9709505944546686 1.0 +err_prefin 70598.375 +err_fin 38307.6796875 +sparsity check 0.39744168519973755 +time 67.79 +72 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 5828300.0 +err_fin 4228134.5 +sparsity check 0.3999999931880406 +time 137.79 +72 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 5491108.0 +err_fin 3969415.5 +sparsity check 0.3999999931880406 +time 138.18 +72 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 746648.0625 +err_fin 700877.0 +sparsity check 0.3999999931880406 +time 135.37 +73 self_attn.q_proj +Pruning ... +0.39998745918273926 0.09998747706413269 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502467729374095 0.9709505944546686 1.0 +err_prefin 477676.0625 +err_fin 350389.125 +sparsity check 0.39998745918273926 +time 74.97 +73 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 143966.09375 +err_fin 122261.875 +sparsity check 0.39999985694885254 +time 1.35 +73 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 200310.625 +err_fin 179147.703125 +sparsity check 0.39999985694885254 +time 1.32 +73 self_attn.o_proj +Pruning ... +0.3994404375553131 0.09944045543670654 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3485102430804012 0.9709505944546686 1.0 +err_prefin 46256.8046875 +err_fin 24534.345703125 +sparsity check 0.3994404375553131 +time 67.84 +73 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 5971108.5 +err_fin 4316203.0 +sparsity check 0.3999999931880406 +time 137.78 +73 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 5658571.5 +err_fin 4075587.0 +sparsity check 0.3999999931880406 +time 138.01 +73 mlp.down_proj +Pruning ... +0.399999988930566 0.19999997317790985 0.34285713945116314 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917398757333 0.9709505944546686 1.0 +err_prefin 821471.8125 +err_fin 766429.5 +sparsity check 0.399999988930566 +time 135.77 +74 self_attn.q_proj +Pruning ... +0.3999999761581421 0.09999999403953552 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502864520676674 0.9709505944546686 1.0 +err_prefin 441981.6875 +err_fin 319593.8125 +sparsity check 0.3999999761581421 +time 74.99 +74 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 131631.90625 +err_fin 108456.46875 +sparsity check 0.39999985694885254 +time 1.36 +74 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 152674.359375 +err_fin 135078.734375 +sparsity check 0.39999985694885254 +time 1.32 +74 self_attn.o_proj +Pruning ... +0.39621762931346893 0.09621764719486237 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3381807222541005 0.9709505944546686 1.0 +err_prefin 98191.1640625 +err_fin 43221.46875 +sparsity check 0.39621762931346893 +time 67.83 +74 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 6117716.5 +err_fin 4367414.0 +sparsity check 0.3999999931880406 +time 137.78 +74 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 5819283.0 +err_fin 4138981.0 +sparsity check 0.3999999931880406 +time 138.19 +74 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 906995.0 +err_fin 841319.5625 +sparsity check 0.3999999931880406 +time 135.34 +75 self_attn.q_proj +Pruning ... +0.3999999761581421 0.09999999403953552 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502864520676674 0.9709505944546686 1.0 +err_prefin 446136.75 +err_fin 319102.375 +sparsity check 0.3999999761581421 +time 74.99 +75 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 127815.8515625 +err_fin 102692.234375 +sparsity check 0.39999985694885254 +time 1.32 +75 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 181831.625 +err_fin 157889.078125 +sparsity check 0.39999985694885254 +time 1.31 +75 self_attn.o_proj +Pruning ... +0.3955359160900116 0.09553593397140503 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3359738332557134 0.9709505944546686 1.0 +err_prefin 91375.703125 +err_fin 40776.328125 +sparsity check 0.3955359160900116 +time 67.87 +75 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 6186535.0 +err_fin 4373899.0 +sparsity check 0.3999999931880406 +time 137.78 +75 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 5882684.0 +err_fin 4144648.0 +sparsity check 0.3999999931880406 +time 138.19 +75 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 1023284.6875 +err_fin 936964.875 +sparsity check 0.3999999931880406 +time 135.35 +76 self_attn.q_proj +Pruning ... +0.3999999761581421 0.09999999403953552 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502864520676674 0.9709505944546686 1.0 +err_prefin 681006.0625 +err_fin 474383.15625 +sparsity check 0.3999999761581421 +time 74.95 +76 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 187218.53125 +err_fin 149162.0625 +sparsity check 0.39999985694885254 +time 1.32 +76 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 274581.15625 +err_fin 235705.09375 +sparsity check 0.39999985694885254 +time 1.30 +76 self_attn.o_proj +Pruning ... +0.38502632081508636 0.0850263386964798 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3009376624713092 0.9709505944546686 1.0 +err_prefin 303418.03125 +err_fin 155106.640625 +sparsity check 0.38502632081508636 +time 67.87 +76 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 6181025.5 +err_fin 4269697.0 +sparsity check 0.3999999931880406 +time 137.79 +76 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 5839910.0 +err_fin 4014858.0 +sparsity check 0.3999999931880406 +time 138.07 +76 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 1254402.5 +err_fin 1118266.625 +sparsity check 0.3999999931880406 +time 135.33 +77 self_attn.q_proj +Pruning ... +0.3999999761581421 0.09999999403953552 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502864520676674 0.9709505944546686 1.0 +err_prefin 460530.625 +err_fin 307653.4375 +sparsity check 0.3999999761581421 +time 74.97 +77 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 136339.171875 +err_fin 103951.3203125 +sparsity check 0.39999985694885254 +time 1.32 +77 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 187703.84375 +err_fin 158275.296875 +sparsity check 0.39999985694885254 +time 1.33 +77 self_attn.o_proj +Pruning ... +0.3909844905138016 0.09098450839519501 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3210383461275814 0.9709505944546686 1.0 +err_prefin 134321.203125 +err_fin 50604.8671875 +sparsity check 0.3909844905138016 +time 67.90 +77 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 5808654.0 +err_fin 3888417.5 +sparsity check 0.3999999931880406 +time 137.84 +77 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 5495126.0 +err_fin 3658363.25 +sparsity check 0.3999999931880406 +time 138.21 +77 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 1474809.0 +err_fin 1266817.5 +sparsity check 0.3999999931880406 +time 135.37 +78 self_attn.q_proj +Pruning ... +0.3999999761581421 0.09999999403953552 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502864520676674 0.9709505944546686 1.0 +err_prefin 445713.0 +err_fin 273131.4375 +sparsity check 0.3999999761581421 +time 74.95 +78 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 141628.875 +err_fin 101864.96875 +sparsity check 0.39999985694885254 +time 1.33 +78 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 162669.1875 +err_fin 140059.9375 +sparsity check 0.39999985694885254 +time 1.30 +78 self_attn.o_proj +Pruning ... +0.397164523601532 0.09716454148292542 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3412332918506629 0.9709505944546686 1.0 +err_prefin 93672.1171875 +err_fin 37403.00390625 +sparsity check 0.397164523601532 +time 67.88 +78 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 4919987.0 +err_fin 3151665.5 +sparsity check 0.3999999931880406 +time 137.80 +78 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 4608632.0 +err_fin 2935308.5 +sparsity check 0.3999999931880406 +time 138.19 +78 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 1597177.75 +err_fin 1275748.25 +sparsity check 0.3999999931880406 +time 135.34 +79 self_attn.q_proj +Pruning ... +0.39998845756053925 0.09998847544193268 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.3502499379125963 0.9709505944546686 1.0 +err_prefin 290667.125 +err_fin 163923.90625 +sparsity check 0.39998845756053925 +time 74.94 +79 self_attn.k_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 105166.34375 +err_fin 75287.1015625 +sparsity check 0.39999985694885254 +time 1.32 +79 self_attn.v_proj +Pruning ... +0.39999985694885254 0.19999980926513672 0.37499988079071045 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0446748792489604 0.9709505944546686 1.0 +err_prefin 74843.890625 +err_fin 59122.14453125 +sparsity check 0.39999985694885254 +time 1.30 +79 self_attn.o_proj +Pruning ... +0.3843609243631363 0.08436094224452972 0.29999998211860657 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2986527357470568 0.9709505944546686 1.0 +err_prefin 27899.15234375 +err_fin 7628.357421875 +sparsity check 0.3843609243631363 +time 67.88 +79 mlp.gate_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 3068756.0 +err_fin 1793884.75 +sparsity check 0.3999999931880406 +time 137.82 +79 mlp.up_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 2823493.25 +err_fin 1643121.375 +sparsity check 0.3999999931880406 +time 138.16 +79 mlp.down_proj +Pruning ... +0.3999999931880406 0.19999997317790985 0.3428571437086378 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1337917438717968 0.9709505944546686 1.0 +err_prefin 1458193.5 +err_fin 930175.0 +sparsity check 0.3999999931880406 +time 135.36 +model.embed_tokens.weight tensor(2.5520e-06) +model.layers.0.self_attn.q_proj.weight tensor(4.8578e-06) +model.layers.0.self_attn.k_proj.weight tensor(0.0338) +model.layers.0.self_attn.v_proj.weight tensor(0.0718) +model.layers.0.self_attn.o_proj.weight tensor(4.2766e-06) +model.layers.0.mlp.gate_proj.weight tensor(2.7333e-06) +model.layers.0.mlp.up_proj.weight tensor(2.9887e-06) +model.layers.0.mlp.down_proj.weight tensor(0.0261) +50957.80866575241 +Dataset: wikitext2 +Evaluating ... +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +Perplexity: 4.265176 diff --git a/logs/llama2-70-0.6-no-final b/logs/llama2-70-0.6-no-final new file mode 100644 index 0000000..743693c --- /dev/null +++ b/logs/llama2-70-0.6-no-final @@ -0,0 +1,2897 @@ +Running on dev: cuda:0 +loading llama +llama loaded +Starting... on device cuda:0 +Ready. +0 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 17.612327575683594 +time 74.25 +0 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 31.51104736328125 +time 1.29 +0 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 5.1145734786987305 +time 1.29 +0 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 0.7482728362083435 +time 67.00 +0 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 472.1926574707031 +time 132.74 +0 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 473.89642333984375 +time 133.00 +0 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 8.996854782104492 +time 132.26 +1 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 322.709228515625 +time 74.37 +1 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 329.696533203125 +time 1.31 +1 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 36.43440628051758 +time 1.31 +1 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 25.482433319091797 +time 67.22 +1 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3330.572265625 +time 133.13 +1 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3652.72802734375 +time 133.58 +1 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 204.40255737304688 +time 132.78 +2 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 1244.264404296875 +time 74.41 +2 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 1398.383544921875 +time 1.33 +2 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 169.42843627929688 +time 1.32 +2 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 125.15476989746094 +time 67.33 +2 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 14816.5078125 +time 133.21 +2 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 15628.775390625 +time 133.57 +2 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 752.6139526367188 +time 132.81 +3 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 10678.88671875 +time 74.34 +3 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 7112.27734375 +time 1.34 +3 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 2342.0009765625 +time 1.32 +3 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 237.67080688476562 +time 67.23 +3 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 36961.9921875 +time 133.20 +3 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 37519.90625 +time 133.57 +3 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1129.751708984375 +time 132.77 +4 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 22656.783203125 +time 74.31 +4 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 10853.47265625 +time 1.32 +4 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 3980.1181640625 +time 1.32 +4 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 300.5199279785156 +time 67.29 +4 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 65355.2578125 +time 133.21 +4 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 64738.76953125 +time 133.56 +4 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1746.341796875 +time 132.74 +5 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 33657.921875 +time 74.28 +5 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 17652.955078125 +time 1.31 +5 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 5824.9716796875 +time 1.30 +5 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 491.3681640625 +time 67.19 +5 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 85074.578125 +time 133.13 +5 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 83678.984375 +time 133.49 +5 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2472.36328125 +time 132.72 +6 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 38911.2890625 +time 74.35 +6 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 19984.470703125 +time 1.34 +6 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 6915.70263671875 +time 1.32 +6 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 966.6387939453125 +time 67.27 +6 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 116079.0234375 +time 133.18 +6 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 113430.8984375 +time 133.54 +6 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3741.281494140625 +time 132.71 +7 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 63400.828125 +time 74.18 +7 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 30530.984375 +time 1.31 +7 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 9929.8046875 +time 1.29 +7 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 1133.021484375 +time 67.14 +7 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 146139.15625 +time 132.95 +7 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 142522.75 +time 133.31 +7 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 5315.3740234375 +time 132.51 +8 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 55419.0 +time 74.18 +8 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 26642.69921875 +time 1.31 +8 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 9577.478515625 +time 1.31 +8 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 1881.4942626953125 +time 67.13 +8 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 205496.90625 +time 132.88 +8 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 196712.40625 +time 133.16 +8 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 9158.251953125 +time 132.47 +9 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 62077.3515625 +time 74.20 +9 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 30216.158203125 +time 1.33 +9 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 14768.783203125 +time 1.31 +9 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 1096.98828125 +time 67.09 +9 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 267625.78125 +time 132.90 +9 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 254397.75 +time 133.21 +9 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 9453.677734375 +time 132.43 +10 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 36566.9453125 +time 74.19 +10 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 14957.2578125 +time 1.32 +10 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 14397.6748046875 +time 1.32 +10 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 745.6788940429688 +time 67.11 +10 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 330457.625 +time 132.92 +10 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 312773.96875 +time 133.27 +10 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 10327.0927734375 +time 132.47 +11 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 59660.21875 +time 74.18 +11 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 30841.203125 +time 1.31 +11 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 16662.66015625 +time 1.30 +11 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 1191.482421875 +time 67.06 +11 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 357958.4375 +time 132.88 +11 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 340149.3125 +time 133.22 +11 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 11469.671875 +time 132.46 +12 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 51142.75 +time 74.14 +12 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 27139.501953125 +time 1.32 +12 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 12346.076171875 +time 1.30 +12 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 1558.240478515625 +time 67.09 +12 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 354703.1875 +time 132.86 +12 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 339935.875 +time 133.19 +12 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 12818.328125 +time 132.46 +13 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 65248.85546875 +time 74.20 +13 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 29296.85546875 +time 1.31 +13 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 16889.134765625 +time 1.30 +13 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 3927.85791015625 +time 67.10 +13 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 375438.25 +time 132.88 +13 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 363148.0625 +time 133.23 +13 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 16453.6328125 +time 132.43 +14 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 108937.71875 +time 74.18 +14 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 49773.47265625 +time 1.31 +14 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 23565.927734375 +time 1.30 +14 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 4765.5234375 +time 67.15 +14 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 437450.4375 +time 132.88 +14 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 422844.59375 +time 133.26 +14 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 17558.125 +time 132.46 +15 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 79193.8984375 +time 74.16 +15 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 34913.6875 +time 1.33 +15 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 29505.78515625 +time 1.30 +15 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 4935.31298828125 +time 67.10 +15 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 467670.09375 +time 132.93 +15 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 454470.0 +time 133.30 +15 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 20961.91796875 +time 132.56 +16 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 77807.2890625 +time 74.20 +16 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 36305.625 +time 1.33 +16 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 27183.01171875 +time 1.31 +16 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 4128.67822265625 +time 67.15 +16 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 495834.625 +time 133.00 +16 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 483090.375 +time 133.37 +16 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 20891.5625 +time 132.53 +17 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 100351.65625 +time 74.21 +17 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 47200.68359375 +time 1.32 +17 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 23141.37890625 +time 1.31 +17 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 6387.916015625 +time 67.16 +17 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 486892.5 +time 132.90 +17 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 475838.3125 +time 133.24 +17 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 27100.16015625 +time 132.38 +18 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 93711.421875 +time 74.16 +18 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 41831.859375 +time 1.31 +18 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 36948.33203125 +time 1.31 +18 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 6310.619140625 +time 67.14 +18 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 538415.4375 +time 132.90 +18 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 523063.0625 +time 133.15 +18 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 29556.50390625 +time 132.49 +19 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 113524.328125 +time 74.19 +19 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 49532.1015625 +time 1.30 +19 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 44482.59375 +time 1.31 +19 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 5303.1357421875 +time 67.16 +19 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 585978.25 +time 132.88 +19 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 569840.875 +time 133.26 +19 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 29731.625 +time 132.52 +20 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 100755.21875 +time 74.21 +20 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 42360.0625 +time 1.33 +20 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 36447.08203125 +time 1.32 +20 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 3984.774658203125 +time 67.17 +20 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 633971.25 +time 132.90 +20 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 617294.625 +time 133.20 +20 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 30067.0078125 +time 132.44 +21 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 61406.9296875 +time 74.19 +21 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 23238.681640625 +time 1.32 +21 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 31092.671875 +time 1.30 +21 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 5884.35009765625 +time 67.08 +21 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 661759.1875 +time 132.86 +21 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 647559.75 +time 133.24 +21 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 31720.4453125 +time 132.47 +22 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 59322.75 +time 74.20 +22 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 27160.921875 +time 1.31 +22 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 35606.453125 +time 1.31 +22 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 4092.3671875 +time 67.15 +22 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 697785.9375 +time 132.88 +22 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 683844.1875 +time 133.22 +22 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 32690.904296875 +time 132.37 +23 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 87082.828125 +time 74.03 +23 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 43012.2578125 +time 1.30 +23 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 37611.08203125 +time 1.30 +23 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 5409.3359375 +time 67.00 +23 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 718901.1875 +time 132.71 +23 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 706408.125 +time 133.06 +23 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 33584.59375 +time 132.28 +24 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 77167.984375 +time 74.24 +24 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 37746.296875 +time 1.31 +24 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 27919.7109375 +time 1.30 +24 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 4681.7431640625 +time 67.21 +24 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 699628.9375 +time 133.10 +24 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 691800.0 +time 133.35 +24 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 33803.04296875 +time 132.64 +25 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 87325.171875 +time 74.24 +25 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 42500.3125 +time 1.31 +25 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 29050.744140625 +time 1.30 +25 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 11818.7099609375 +time 67.20 +25 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 654517.125 +time 133.11 +25 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 651695.375 +time 133.45 +25 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 47636.0859375 +time 132.68 +26 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 130363.6328125 +time 74.30 +26 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 62085.8203125 +time 1.30 +26 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 43831.43359375 +time 1.31 +26 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 12798.46875 +time 67.24 +26 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 748994.75 +time 133.22 +26 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 742715.75 +time 133.55 +26 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 46746.4375 +time 132.76 +27 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 57723.2578125 +time 74.30 +27 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 27932.28515625 +time 1.31 +27 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 37825.0703125 +time 1.32 +27 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 5532.9150390625 +time 67.29 +27 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 795661.875 +time 133.23 +27 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 788209.0 +time 133.57 +27 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 49022.3125 +time 132.79 +28 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 106256.9921875 +time 74.18 +28 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 54390.6484375 +time 1.34 +28 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 57257.546875 +time 1.30 +28 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 9016.744140625 +time 67.15 +28 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 840818.375 +time 132.94 +28 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 833063.1875 +time 133.19 +28 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 51601.015625 +time 132.47 +29 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 104595.203125 +time 74.29 +29 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 47897.2421875 +time 1.31 +29 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 62577.046875 +time 1.30 +29 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 10879.46484375 +time 67.25 +29 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 892475.9375 +time 133.22 +29 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 882220.875 +time 133.58 +29 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 54406.44140625 +time 132.81 +30 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 52978.29296875 +time 74.31 +30 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 20202.720703125 +time 1.33 +30 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 57756.62890625 +time 1.31 +30 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 5304.2724609375 +time 67.23 +30 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 946987.125 +time 133.20 +30 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 933448.5625 +time 133.55 +30 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 56272.2265625 +time 132.77 +31 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 109427.0546875 +time 74.29 +31 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 48981.40625 +time 1.32 +31 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 61405.2109375 +time 1.31 +31 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 10939.416015625 +time 66.95 +31 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 991568.625 +time 132.51 +31 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 975173.8125 +time 132.88 +31 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 57463.484375 +time 132.16 +32 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 126923.3125 +time 74.24 +32 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 59173.9296875 +time 1.32 +32 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 55641.9375 +time 1.30 +32 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 7182.63232421875 +time 67.17 +32 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1016079.75 +time 132.95 +32 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1001333.125 +time 133.21 +32 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 60604.0625 +time 132.44 +33 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 180704.390625 +time 74.20 +33 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 78881.8984375 +time 1.33 +33 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 61134.00390625 +time 1.31 +33 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 14254.791015625 +time 67.10 +33 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1024896.375 +time 132.88 +33 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1012723.375 +time 133.22 +33 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 69529.09375 +time 132.36 +34 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 61557.296875 +time 74.18 +34 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 25669.53515625 +time 1.36 +34 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 36394.3125 +time 1.32 +34 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 9210.3095703125 +time 67.13 +34 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1058720.0 +time 132.87 +34 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1045177.75 +time 133.11 +34 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 75892.5 +time 132.35 +35 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 104925.34375 +time 74.15 +35 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 42556.8515625 +time 1.31 +35 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 49808.2109375 +time 1.31 +35 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 11742.796875 +time 67.12 +35 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1124256.875 +time 132.88 +35 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1110023.5 +time 133.16 +35 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 80856.0234375 +time 132.38 +36 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 100388.4609375 +time 74.01 +36 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 41796.96484375 +time 1.32 +36 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 48720.4453125 +time 1.30 +36 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 7499.4580078125 +time 66.95 +36 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1149486.25 +time 132.54 +36 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1136073.5 +time 133.03 +36 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 90240.140625 +time 132.09 +37 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 160800.546875 +time 74.09 +37 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 69916.5625 +time 1.31 +37 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 62135.01171875 +time 1.30 +37 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 13930.455078125 +time 67.07 +37 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1199847.875 +time 132.88 +37 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1185947.0 +time 133.22 +37 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 98375.5859375 +time 132.39 +38 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 180662.359375 +time 74.13 +38 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 77058.484375 +time 1.32 +38 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 64838.8828125 +time 1.29 +38 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 22819.05859375 +time 67.12 +38 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1234275.625 +time 132.86 +38 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1223039.125 +time 133.16 +38 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 110680.078125 +time 132.36 +39 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 221250.59375 +time 74.14 +39 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 102464.453125 +time 1.33 +39 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 70258.5 +time 1.31 +39 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 26567.326171875 +time 67.09 +39 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1243297.75 +time 132.83 +39 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1233616.125 +time 133.19 +39 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 135681.84375 +time 132.38 +40 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 206396.625 +time 74.00 +40 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 88803.6953125 +time 1.31 +40 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 80005.4140625 +time 1.30 +40 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 39003.3046875 +time 67.00 +40 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1288138.25 +time 132.61 +40 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1276775.25 +time 133.00 +40 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 154915.09375 +time 131.79 +41 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 166153.84375 +time 74.16 +41 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 69553.84375 +time 1.32 +41 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 69993.46875 +time 1.31 +41 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 43492.41015625 +time 67.08 +41 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1337140.875 +time 132.84 +41 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1305336.0 +time 133.29 +41 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 183269.5625 +time 132.45 +42 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 192392.625 +time 74.18 +42 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 75512.3359375 +time 1.32 +42 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 86431.2265625 +time 1.31 +42 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 39116.44140625 +time 67.09 +42 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1442253.25 +time 132.84 +42 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1377874.0 +time 133.33 +42 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 197992.65625 +time 132.48 +43 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 132773.1875 +time 74.17 +43 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 61471.68359375 +time 1.33 +43 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 67454.6875 +time 1.30 +43 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 29826.2265625 +time 67.10 +43 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1498712.75 +time 132.86 +43 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1418535.25 +time 133.23 +43 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 219049.484375 +time 132.40 +44 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 240496.484375 +time 74.17 +44 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 79229.8359375 +time 1.32 +44 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 104943.875 +time 1.30 +44 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 67515.375 +time 67.12 +44 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1641352.375 +time 132.86 +44 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1503583.0 +time 133.21 +44 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 256954.328125 +time 132.38 +45 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 150158.953125 +time 74.16 +45 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 64959.00390625 +time 1.33 +45 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 102122.0 +time 1.31 +45 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 21533.6875 +time 67.08 +45 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1778674.25 +time 132.87 +45 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1609157.75 +time 133.24 +45 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 263130.34375 +time 132.38 +46 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 126961.8515625 +time 74.16 +46 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 51756.03515625 +time 1.33 +46 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 88172.375 +time 1.30 +46 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 28422.970703125 +time 67.09 +46 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1911585.25 +time 132.88 +46 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1703843.375 +time 133.23 +46 mlp.down_proj +Pruning ... +0.39999998467309134 0.2499999701976776 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1452538745832397 0.9709505944546686 1.0 +err_prefin 263536.6875 +time 132.29 +47 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 173263.8125 +time 74.18 +47 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 75120.765625 +time 1.32 +47 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 85955.203125 +time 1.31 +47 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 42869.1015625 +time 67.13 +47 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1978742.5 +time 132.83 +47 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1741335.5 +time 133.19 +47 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 297206.8125 +time 132.49 +48 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 85451.9375 +time 74.18 +48 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 28746.822265625 +time 1.32 +48 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 83795.6484375 +time 1.30 +48 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 28718.93359375 +time 67.13 +48 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2072749.25 +time 132.88 +48 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1816530.75 +time 133.19 +48 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 296262.71875 +time 132.30 +49 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 47280.34375 +time 74.17 +49 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 19258.259765625 +time 1.32 +49 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 67885.0 +time 1.31 +49 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 10816.5068359375 +time 67.11 +49 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2155661.5 +time 132.85 +49 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1886680.625 +time 133.26 +49 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 296223.4375 +time 132.35 +50 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 76488.140625 +time 74.18 +50 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 25997.84375 +time 1.32 +50 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 87406.3125 +time 1.30 +50 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 21289.626953125 +time 67.10 +50 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2241165.5 +time 132.87 +50 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1952781.5 +time 133.25 +50 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 297756.46875 +time 132.38 +51 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 99703.515625 +time 74.17 +51 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 38889.2421875 +time 1.32 +51 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 91222.671875 +time 1.31 +51 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 19600.505859375 +time 67.12 +51 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2306488.75 +time 132.85 +51 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2014633.75 +time 133.21 +51 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 297955.40625 +time 132.35 +52 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 168291.375 +time 74.16 +52 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 50932.2890625 +time 1.33 +52 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 104135.4375 +time 1.31 +52 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 36862.6953125 +time 67.13 +52 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2366128.0 +time 132.86 +52 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2063428.875 +time 133.21 +52 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 304441.96875 +time 132.46 +53 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 50769.15625 +time 74.16 +53 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 15295.44140625 +time 1.32 +53 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 87299.375 +time 1.31 +53 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 15707.03125 +time 67.08 +53 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2430075.0 +time 132.86 +53 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2123971.5 +time 133.18 +53 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 307674.5 +time 132.34 +54 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 59797.8046875 +time 74.17 +54 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 17676.337890625 +time 1.31 +54 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 89841.21875 +time 1.31 +54 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 17624.03515625 +time 67.13 +54 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2491099.5 +time 132.86 +54 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2182816.75 +time 133.24 +54 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 307152.4375 +time 132.47 +55 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 98281.03125 +time 74.13 +55 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 34463.125 +time 1.32 +55 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 117563.0 +time 1.30 +55 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 35912.5390625 +time 67.05 +55 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2539226.0 +time 132.86 +55 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2240838.25 +time 133.19 +55 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 314594.53125 +time 132.37 +56 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 73793.3125 +time 74.18 +56 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 27353.845703125 +time 1.31 +56 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 82864.546875 +time 1.29 +56 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 18498.107421875 +time 67.12 +56 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2577073.5 +time 132.84 +56 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2278607.5 +time 133.10 +56 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 318420.875 +time 132.37 +57 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 65738.078125 +time 74.16 +57 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 20980.23828125 +time 1.33 +57 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 106542.828125 +time 1.30 +57 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 19532.16796875 +time 67.11 +57 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2664942.0 +time 132.87 +57 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2360458.5 +time 133.24 +57 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 323856.625 +time 132.46 +58 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 36086.94921875 +time 74.19 +58 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 10523.658203125 +time 1.33 +58 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 76194.921875 +time 1.31 +58 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 12682.658203125 +time 67.13 +58 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2722121.0 +time 132.89 +58 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2416679.5 +time 133.28 +58 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 325329.375 +time 132.41 +59 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 46806.7265625 +time 74.15 +59 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 13380.953125 +time 1.32 +59 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 84532.6875 +time 1.31 +59 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 12104.3583984375 +time 67.07 +59 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2788820.75 +time 132.85 +59 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2478200.5 +time 133.25 +59 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 324977.9375 +time 132.39 +60 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 7371.5859375 +time 74.18 +60 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 1918.373779296875 +time 1.32 +60 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 45889.3359375 +time 1.31 +60 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 7125.640625 +time 67.15 +60 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2827728.0 +time 132.91 +60 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2526013.25 +time 133.28 +60 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 322436.03125 +time 131.98 +61 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 29236.283203125 +time 74.19 +61 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 7915.310546875 +time 1.31 +61 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 78067.921875 +time 1.30 +61 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 11780.419921875 +time 67.12 +61 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2881668.25 +time 132.90 +61 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2586985.5 +time 133.26 +61 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 323645.25 +time 132.36 +62 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 31343.787109375 +time 74.19 +62 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 9455.978515625 +time 1.33 +62 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 84606.421875 +time 1.31 +62 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 9645.7236328125 +time 67.06 +62 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2944337.75 +time 132.85 +62 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2652504.0 +time 133.21 +62 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 326877.1875 +time 132.49 +63 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 28740.30078125 +time 74.16 +63 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 7151.73046875 +time 1.31 +63 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 65387.8359375 +time 1.30 +63 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 8003.251953125 +time 67.08 +63 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3011278.0 +time 132.83 +63 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2716184.5 +time 133.23 +63 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 330764.0625 +time 132.41 +64 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 54376.578125 +time 74.14 +64 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 16704.046875 +time 1.32 +64 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 95303.6953125 +time 1.31 +64 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 16367.1484375 +time 67.06 +64 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3071841.0 +time 132.85 +64 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2780696.5 +time 133.23 +64 mlp.down_proj +Pruning ... +0.39999998467309134 0.2499999850988388 0.32857141750199453 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253876941607 0.9709505944546686 1.0 +err_prefin 335757.34375 +time 132.46 +65 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 14479.54296875 +time 74.15 +65 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 3315.9599609375 +time 1.31 +65 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 52430.859375 +time 1.30 +65 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 6687.37158203125 +time 67.10 +65 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3122902.0 +time 132.85 +65 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2839093.5 +time 133.20 +65 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 340854.46875 +time 132.36 +66 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 24763.19921875 +time 74.15 +66 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 6424.70751953125 +time 1.31 +66 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 69690.8828125 +time 1.30 +66 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 14345.8642578125 +time 67.12 +66 mlp.gate_proj +Pruning ... +0.39999998467309134 0.2499999701976776 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.1452538745832397 0.9709505944546686 1.0 +err_prefin 3198961.75 +time 132.83 +66 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2925467.0 +time 133.20 +66 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 357014.3125 +time 132.38 +67 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 11864.314453125 +time 74.16 +67 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 2948.201171875 +time 1.32 +67 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 27720.13671875 +time 1.31 +67 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 4095.9384765625 +time 67.09 +67 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3223908.25 +time 132.86 +67 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2971735.25 +time 133.20 +67 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 357864.375 +time 132.34 +68 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 104560.09375 +time 74.17 +68 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 38269.0546875 +time 1.33 +68 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 137282.203125 +time 1.30 +68 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 15658.8642578125 +time 67.14 +68 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3347816.0 +time 132.88 +68 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3099588.75 +time 133.25 +68 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 384504.1875 +time 132.38 +69 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 179176.3125 +time 74.15 +69 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 86904.4140625 +time 1.32 +69 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 180676.28125 +time 1.30 +69 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 25021.240234375 +time 67.09 +69 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3413536.0 +time 132.88 +69 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3192965.0 +time 133.26 +69 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 403899.0 +time 132.39 +70 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 100739.75 +time 74.20 +70 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 38826.25 +time 1.33 +70 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 102769.8203125 +time 1.30 +70 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 18622.0390625 +time 67.12 +70 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3491604.5 +time 132.86 +70 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3285257.5 +time 133.36 +70 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 431766.75 +time 132.32 +71 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 139430.90625 +time 74.17 +71 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 65701.375 +time 1.31 +71 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 110579.6484375 +time 1.30 +71 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 24041.388671875 +time 67.11 +71 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3603060.75 +time 132.87 +71 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3395588.5 +time 133.19 +71 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 456766.0 +time 132.33 +72 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 196384.375 +time 74.21 +72 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 92021.578125 +time 1.32 +72 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 129053.34375 +time 1.31 +72 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 31116.056640625 +time 67.11 +72 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3686122.5 +time 132.90 +72 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3498898.5 +time 133.15 +72 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 491849.6875 +time 132.31 +73 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 182664.953125 +time 74.16 +73 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 89948.421875 +time 1.33 +73 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 147008.59375 +time 1.31 +73 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 20152.54296875 +time 67.12 +73 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3775901.0 +time 132.87 +73 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3598868.5 +time 133.26 +73 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 542224.375 +time 132.44 +74 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 165940.3125 +time 74.19 +74 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 75133.015625 +time 1.33 +74 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 111994.921875 +time 1.31 +74 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 41080.0625 +time 67.13 +74 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3864722.0 +time 132.88 +74 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3695610.5 +time 133.24 +74 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 599601.125 +time 132.37 +75 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 168422.0625 +time 74.16 +75 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 73985.390625 +time 1.32 +75 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 126582.671875 +time 1.29 +75 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 37896.7421875 +time 67.14 +75 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3896304.0 +time 132.89 +75 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3721670.0 +time 133.25 +75 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 673825.3125 +time 132.42 +76 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 253578.78125 +time 74.18 +76 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 108152.5625 +time 1.32 +76 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 202025.6875 +time 1.30 +76 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 124319.3671875 +time 67.15 +76 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3856496.5 +time 132.88 +76 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3656621.0 +time 133.26 +76 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 813325.5 +time 132.37 +77 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 167027.765625 +time 74.20 +77 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 77202.765625 +time 1.31 +77 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 135316.703125 +time 1.31 +77 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 54281.9140625 +time 67.13 +77 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3588503.25 +time 132.91 +77 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 3403949.5 +time 133.28 +77 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 934467.25 +time 132.35 +78 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 148941.203125 +time 74.19 +78 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 73460.6328125 +time 1.33 +78 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 119373.671875 +time 1.31 +78 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 33232.4609375 +time 67.18 +78 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2989115.0 +time 132.91 +78 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 2800694.0 +time 133.28 +78 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 982731.75 +time 132.28 +79 self_attn.q_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 93165.2265625 +time 74.18 +79 self_attn.k_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 57591.75 +time 1.34 +79 self_attn.v_proj +Pruning ... +0.399999737739563 0.2499990463256836 0.36874985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 1.0511169376167957 0.9709505944546686 1.0 +err_prefin 52218.36328125 +time 1.31 +79 self_attn.o_proj +Pruning ... +0.3999999612569809 0.15999998152256012 0.23999997973442078 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.4293497561202342 0.9709505944546686 1.0 +err_prefin 9484.1318359375 +time 67.13 +79 mlp.gate_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1811554.75 +time 132.96 +79 mlp.up_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 1657092.0 +time 133.33 +79 mlp.down_proj +Pruning ... +0.399999988930566 0.2499999850988388 0.3285714217594692 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.145253881331178 0.9709505944546686 1.0 +err_prefin 818149.125 +time 132.49 +model.embed_tokens.weight tensor(2.5520e-06) +model.layers.0.self_attn.q_proj.weight tensor(0.0106) +model.layers.0.self_attn.k_proj.weight tensor(0.0180) +model.layers.0.self_attn.v_proj.weight tensor(0.0582) +model.layers.0.self_attn.o_proj.weight tensor(4.1723e-06) +model.layers.0.mlp.gate_proj.weight tensor(0.0001) +model.layers.0.mlp.up_proj.weight tensor(0.0001) +model.layers.0.mlp.down_proj.weight tensor(0.0088) +49795.538183927536 +Dataset: wikitext2 +Evaluating ... +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +Perplexity: 3.814271 diff --git a/logs/llama2-70-0.7 b/logs/llama2-70-0.7 new file mode 100644 index 0000000..e7c8c9a --- /dev/null +++ b/logs/llama2-70-0.7 @@ -0,0 +1,4017 @@ +Running on dev: cuda:0 +loading llama +llama loaded +Starting... on device cuda:0 +Ready. +0 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 31.5526065826416 +err_fin 13.801063537597656 +sparsity check 0.2999999672174454 +time 75.95 +0 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 52.44609832763672 +err_fin 19.002490997314453 +sparsity check 0.2999997138977051 +time 1.32 +0 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 11.234882354736328 +err_fin 6.049235820770264 +sparsity check 0.2999997138977051 +time 1.32 +0 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 2.254166603088379 +err_fin 0.19925557076931 +sparsity check 0.2999999672174454 +time 68.70 +0 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 998.2425537109375 +err_fin 420.0853271484375 +sparsity check 0.2999999906335558 +time 138.62 +0 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1007.9627075195312 +err_fin 419.9471130371094 +sparsity check 0.2999999906335558 +time 138.91 +0 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 19.838790893554688 +err_fin 9.329728126525879 +sparsity check 0.2999999906335558 +time 136.67 +1 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 559.4142456054688 +err_fin 195.34976196289062 +sparsity check 0.2999999672174454 +time 75.94 +1 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 666.58203125 +err_fin 232.85142517089844 +sparsity check 0.2999997138977051 +time 1.34 +1 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 80.73096466064453 +err_fin 40.32262420654297 +sparsity check 0.2999997138977051 +time 1.34 +1 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 66.36351776123047 +err_fin 16.87843132019043 +sparsity check 0.2999999672174454 +time 68.82 +1 mlp.gate_proj +Pruning ... +0.2999999863760812 0.2499999850988388 0.2285714192049844 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063847936203 0.9709505944546686 1.0 +err_prefin 7393.64404296875 +err_fin 2988.91845703125 +sparsity check 0.2999999863760812 +time 138.78 +1 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 8189.357421875 +err_fin 3221.509521484375 +sparsity check 0.2999999906335558 +time 139.09 +1 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 407.28009033203125 +err_fin 318.79150390625 +sparsity check 0.2999999906335558 +time 136.83 +2 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 2488.8447265625 +err_fin 1380.255859375 +sparsity check 0.2999999672174454 +time 76.06 +2 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 2615.38037109375 +err_fin 1290.1136474609375 +sparsity check 0.2999997138977051 +time 1.35 +2 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 355.3388671875 +err_fin 225.45562744140625 +sparsity check 0.2999997138977051 +time 1.34 +2 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 306.25860595703125 +err_fin 133.38070678710938 +sparsity check 0.2999999672174454 +time 68.94 +2 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 32337.75390625 +err_fin 16730.00390625 +sparsity check 0.2999999906335558 +time 138.81 +2 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 34219.4921875 +err_fin 17309.54296875 +sparsity check 0.2999999906335558 +time 139.17 +2 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1462.96923828125 +err_fin 1230.7198486328125 +sparsity check 0.2999999906335558 +time 136.94 +3 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 23905.779296875 +err_fin 15115.546875 +sparsity check 0.2999999672174454 +time 75.97 +3 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 14812.9462890625 +err_fin 9731.453125 +sparsity check 0.2999997138977051 +time 1.38 +3 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 4812.263671875 +err_fin 3350.51123046875 +sparsity check 0.2999997138977051 +time 1.33 +3 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 553.1149291992188 +err_fin 313.97454833984375 +sparsity check 0.2999999672174454 +time 68.91 +3 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 78134.578125 +err_fin 46317.03125 +sparsity check 0.2999999906335558 +time 138.82 +3 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 79425.5546875 +err_fin 46321.234375 +sparsity check 0.2999999906335558 +time 139.23 +3 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2157.4443359375 +err_fin 1896.1953125 +sparsity check 0.2999999906335558 +time 136.80 +4 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 49349.1171875 +err_fin 32225.453125 +sparsity check 0.2999999672174454 +time 75.93 +4 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 21972.5625 +err_fin 15072.583984375 +sparsity check 0.2999997138977051 +time 1.35 +4 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 7872.7421875 +err_fin 5850.16162109375 +sparsity check 0.2999997138977051 +time 1.34 +4 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 687.8417358398438 +err_fin 407.13275146484375 +sparsity check 0.2999999672174454 +time 68.89 +4 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 134429.28125 +err_fin 86310.09375 +sparsity check 0.2999999906335558 +time 138.78 +4 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 133324.328125 +err_fin 84613.71875 +sparsity check 0.2999999906335558 +time 139.11 +4 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3295.36669921875 +err_fin 2941.990966796875 +sparsity check 0.2999999906335558 +time 136.62 +5 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 74200.90625 +err_fin 51407.30859375 +sparsity check 0.2999999672174454 +time 75.89 +5 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 35844.0234375 +err_fin 26020.1484375 +sparsity check 0.2999997138977051 +time 1.36 +5 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 11364.1396484375 +err_fin 8919.3125 +sparsity check 0.2999997138977051 +time 1.32 +5 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 1060.3212890625 +err_fin 600.115966796875 +sparsity check 0.2999999672174454 +time 68.86 +5 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 171220.1875 +err_fin 115109.125 +sparsity check 0.2999999906335558 +time 138.81 +5 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 168897.5625 +err_fin 112407.5625 +sparsity check 0.2999999906335558 +time 139.11 +5 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4605.4111328125 +err_fin 4083.410400390625 +sparsity check 0.2999999906335558 +time 136.74 +6 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 84691.625 +err_fin 60449.29296875 +sparsity check 0.2999999672174454 +time 75.95 +6 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 39424.7890625 +err_fin 29796.72265625 +sparsity check 0.2999997138977051 +time 1.35 +6 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 13354.880859375 +err_fin 10822.541015625 +sparsity check 0.2999997138977051 +time 1.33 +6 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 2051.59423828125 +err_fin 1327.2574462890625 +sparsity check 0.2999999672174454 +time 68.87 +6 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 230577.90625 +err_fin 158950.140625 +sparsity check 0.2999999906335558 +time 138.78 +6 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 225769.265625 +err_fin 154366.09375 +sparsity check 0.2999999906335558 +time 139.15 +6 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6892.95654296875 +err_fin 6190.93994140625 +sparsity check 0.2999999906335558 +time 136.72 +7 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 138298.125 +err_fin 103012.3984375 +sparsity check 0.2999999672174454 +time 75.95 +7 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 59487.4140625 +err_fin 47204.64453125 +sparsity check 0.2999997138977051 +time 1.36 +7 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 18909.47265625 +err_fin 15829.427734375 +sparsity check 0.2999997138977051 +time 1.33 +7 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 2414.797607421875 +err_fin 1489.6678466796875 +sparsity check 0.2999999672174454 +time 68.86 +7 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 287567.875 +err_fin 203346.28125 +sparsity check 0.2999999906335558 +time 138.78 +7 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 280876.90625 +err_fin 197131.953125 +sparsity check 0.2999999906335558 +time 139.12 +7 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 9785.451171875 +err_fin 8780.009765625 +sparsity check 0.2999999906335558 +time 136.69 +8 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 119541.5546875 +err_fin 90286.796875 +sparsity check 0.2999999672174454 +time 75.92 +8 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 52394.3125 +err_fin 41045.8828125 +sparsity check 0.2999997138977051 +time 1.34 +8 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 18216.1171875 +err_fin 15168.751953125 +sparsity check 0.2999997138977051 +time 1.34 +8 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 4256.765625 +err_fin 2574.2548828125 +sparsity check 0.2999999672174454 +time 68.84 +8 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 400993.75 +err_fin 292364.25 +sparsity check 0.2999999906335558 +time 138.76 +8 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 384228.6875 +err_fin 278253.0 +sparsity check 0.2999999906335558 +time 139.13 +8 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 16458.263671875 +err_fin 15318.830078125 +sparsity check 0.2999999906335558 +time 136.74 +9 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 132341.046875 +err_fin 105049.015625 +sparsity check 0.2999999672174454 +time 75.96 +9 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 57340.03125 +err_fin 48335.9765625 +sparsity check 0.2999997138977051 +time 1.38 +9 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 27674.234375 +err_fin 24191.828125 +sparsity check 0.2999997138977051 +time 1.33 +9 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 2507.3154296875 +err_fin 1418.0616455078125 +sparsity check 0.2999999672174454 +time 68.84 +9 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 518084.46875 +err_fin 386963.375 +sparsity check 0.2999999906335558 +time 138.81 +9 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 493708.6875 +err_fin 366268.28125 +sparsity check 0.2999999906335558 +time 139.11 +9 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 17603.853515625 +err_fin 16046.490234375 +sparsity check 0.2999999906335558 +time 136.75 +10 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 74944.375 +err_fin 59686.69921875 +sparsity check 0.2999999672174454 +time 75.98 +10 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 30239.62890625 +err_fin 24843.46875 +sparsity check 0.2999997138977051 +time 1.34 +10 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 26962.50390625 +err_fin 23496.552734375 +sparsity check 0.2999997138977051 +time 1.34 +10 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 1685.6614990234375 +err_fin 1013.326904296875 +sparsity check 0.2999999672174454 +time 68.85 +10 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 635107.875 +err_fin 491833.0625 +sparsity check 0.2999999906335558 +time 138.80 +10 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 602855.25 +err_fin 464191.0625 +sparsity check 0.2999999906335558 +time 139.15 +10 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 19038.14453125 +err_fin 17591.33984375 +sparsity check 0.2999999906335558 +time 136.66 +11 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 125451.3046875 +err_fin 100865.71875 +sparsity check 0.2999999672174454 +time 75.95 +11 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 59601.6796875 +err_fin 49557.6640625 +sparsity check 0.2999997138977051 +time 1.37 +11 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 31191.890625 +err_fin 27034.7109375 +sparsity check 0.2999997138977051 +time 1.33 +11 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 2637.687255859375 +err_fin 1539.096923828125 +sparsity check 0.2999999672174454 +time 68.86 +11 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 684320.9375 +err_fin 534864.75 +sparsity check 0.2999999906335558 +time 138.80 +11 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 652150.8125 +err_fin 507336.3125 +sparsity check 0.2999999906335558 +time 139.10 +11 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 21235.642578125 +err_fin 19640.88671875 +sparsity check 0.2999999906335558 +time 136.72 +12 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 107555.765625 +err_fin 86075.53125 +sparsity check 0.2999999672174454 +time 75.97 +12 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 52001.375 +err_fin 42313.7890625 +sparsity check 0.2999997138977051 +time 1.35 +12 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 23119.60546875 +err_fin 19647.97265625 +sparsity check 0.2999997138977051 +time 1.34 +12 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 3262.0234375 +err_fin 1880.873046875 +sparsity check 0.2999999672174454 +time 68.89 +12 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 677572.875 +err_fin 530410.9375 +sparsity check 0.2999999906335558 +time 138.82 +12 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 651519.0625 +err_fin 507847.25 +sparsity check 0.2999999906335558 +time 139.11 +12 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 23535.591796875 +err_fin 21650.58984375 +sparsity check 0.2999999906335558 +time 136.72 +13 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 135881.90625 +err_fin 109252.109375 +sparsity check 0.2999999672174454 +time 75.92 +13 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 57302.16796875 +err_fin 47143.3359375 +sparsity check 0.2999997138977051 +time 1.37 +13 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 31665.98046875 +err_fin 27384.41796875 +sparsity check 0.2999997138977051 +time 1.32 +13 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 8571.7724609375 +err_fin 5393.1240234375 +sparsity check 0.2999999672174454 +time 68.82 +13 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 715876.1875 +err_fin 553550.3125 +sparsity check 0.2999999906335558 +time 139.03 +13 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 694280.5 +err_fin 534421.25 +sparsity check 0.2999999906335558 +time 139.33 +13 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 30492.3515625 +err_fin 28018.44921875 +sparsity check 0.2999999906335558 +time 136.91 +14 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 233549.734375 +err_fin 189442.5 +sparsity check 0.2999999672174454 +time 76.14 +14 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 95989.8828125 +err_fin 79835.6796875 +sparsity check 0.2999997138977051 +time 1.36 +14 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 44033.52734375 +err_fin 38565.3984375 +sparsity check 0.2999997138977051 +time 1.35 +14 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 9570.501953125 +err_fin 5837.42431640625 +sparsity check 0.2999999672174454 +time 69.01 +14 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 833335.25 +err_fin 657834.4375 +sparsity check 0.2999999906335558 +time 139.19 +14 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 808530.1875 +err_fin 635628.875 +sparsity check 0.2999999906335558 +time 139.55 +14 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 32437.970703125 +err_fin 30035.171875 +sparsity check 0.2999999906335558 +time 137.05 +15 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 165543.4375 +err_fin 135696.78125 +sparsity check 0.2999999672174454 +time 76.16 +15 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 67497.59375 +err_fin 57078.5703125 +sparsity check 0.2999997138977051 +time 1.36 +15 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 55109.40234375 +err_fin 48422.890625 +sparsity check 0.2999997138977051 +time 1.35 +15 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 10721.1318359375 +err_fin 6648.1845703125 +sparsity check 0.2999999672174454 +time 69.04 +15 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 890758.75 +err_fin 703880.0625 +sparsity check 0.2999999906335558 +time 139.18 +15 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 868523.375 +err_fin 682637.25 +sparsity check 0.2999999906335558 +time 139.56 +15 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 38626.4609375 +err_fin 35541.078125 +sparsity check 0.2999999906335558 +time 137.10 +16 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 161715.25 +err_fin 134021.0625 +sparsity check 0.2999999672174454 +time 76.14 +16 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 71157.5390625 +err_fin 60193.890625 +sparsity check 0.2999997138977051 +time 1.37 +16 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 50627.34765625 +err_fin 44834.8359375 +sparsity check 0.2999997138977051 +time 1.35 +16 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 8355.0703125 +err_fin 5260.6513671875 +sparsity check 0.2999999672174454 +time 69.03 +16 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 943084.25 +err_fin 753712.125 +sparsity check 0.2999999906335558 +time 139.15 +16 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 921673.875 +err_fin 734021.4375 +sparsity check 0.2999999906335558 +time 139.48 +16 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 38418.8125 +err_fin 35650.9375 +sparsity check 0.2999999906335558 +time 137.06 +17 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 212698.5625 +err_fin 174037.84375 +sparsity check 0.2999999672174454 +time 76.04 +17 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 91215.4375 +err_fin 76150.0 +sparsity check 0.2999997138977051 +time 1.36 +17 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 43756.53125 +err_fin 37838.7734375 +sparsity check 0.2999997138977051 +time 1.33 +17 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 14142.9765625 +err_fin 7767.17138671875 +sparsity check 0.2999999672174454 +time 68.93 +17 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 925419.9375 +err_fin 726249.9375 +sparsity check 0.2999999906335558 +time 139.10 +17 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 907551.6875 +err_fin 708821.875 +sparsity check 0.2999999906335558 +time 139.44 +17 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 50562.96875 +err_fin 45572.140625 +sparsity check 0.2999999906335558 +time 136.92 +18 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 195582.46875 +err_fin 161440.25 +sparsity check 0.2999999672174454 +time 75.98 +18 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 81771.046875 +err_fin 69608.484375 +sparsity check 0.2999997138977051 +time 1.36 +18 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 68762.984375 +err_fin 61125.35546875 +sparsity check 0.2999997138977051 +time 1.33 +18 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 13785.78125 +err_fin 8140.5859375 +sparsity check 0.2999999672174454 +time 68.90 +18 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1021166.6875 +err_fin 808623.375 +sparsity check 0.2999999906335558 +time 138.87 +18 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 995050.1875 +err_fin 783959.0 +sparsity check 0.2999999906335558 +time 139.23 +18 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 54236.234375 +err_fin 49195.7890625 +sparsity check 0.2999999906335558 +time 136.76 +19 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 236991.8125 +err_fin 198120.375 +sparsity check 0.2999999672174454 +time 75.99 +19 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 95409.734375 +err_fin 83244.34375 +sparsity check 0.2999997138977051 +time 1.37 +19 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 81631.5546875 +err_fin 74011.890625 +sparsity check 0.2999997138977051 +time 1.34 +19 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 12312.45703125 +err_fin 7862.009765625 +sparsity check 0.2999999672174454 +time 68.86 +19 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1110722.75 +err_fin 889099.75 +sparsity check 0.2999999906335558 +time 138.99 +19 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1082564.125 +err_fin 863356.6875 +sparsity check 0.2999999906335558 +time 139.31 +19 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 53912.125 +err_fin 49698.0625 +sparsity check 0.2999999906335558 +time 136.83 +20 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 206513.515625 +err_fin 174613.0 +sparsity check 0.2999999672174454 +time 75.96 +20 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 82910.4140625 +err_fin 72427.0859375 +sparsity check 0.2999997138977051 +time 1.40 +20 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 66453.0859375 +err_fin 60729.4765625 +sparsity check 0.2999997138977051 +time 1.33 +20 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 8775.353515625 +err_fin 5111.00732421875 +sparsity check 0.2999999672174454 +time 68.86 +20 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1194548.25 +err_fin 961933.375 +sparsity check 0.2999999906335558 +time 138.83 +20 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1165982.375 +err_fin 935335.5 +sparsity check 0.2999999906335558 +time 139.24 +20 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 54315.2578125 +err_fin 50397.1640625 +sparsity check 0.2999999906335558 +time 136.77 +21 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 121441.5390625 +err_fin 102211.625 +sparsity check 0.2999999672174454 +time 76.00 +21 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 47674.41796875 +err_fin 40964.671875 +sparsity check 0.2999997138977051 +time 1.36 +21 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 56955.3671875 +err_fin 51291.2890625 +sparsity check 0.2999997138977051 +time 1.35 +21 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 12005.697265625 +err_fin 7319.1103515625 +sparsity check 0.2999999672174454 +time 68.89 +21 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1243470.0 +err_fin 1001291.0 +sparsity check 0.2999999906335558 +time 138.83 +21 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1219399.375 +err_fin 978155.125 +sparsity check 0.2999999906335558 +time 139.20 +21 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 57454.31640625 +err_fin 53498.1171875 +sparsity check 0.2999999906335558 +time 136.75 +22 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 116548.296875 +err_fin 98789.25 +sparsity check 0.2999999672174454 +time 75.97 +22 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 53585.62890625 +err_fin 46771.4140625 +sparsity check 0.2999997138977051 +time 1.37 +22 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 64866.82421875 +err_fin 59049.6640625 +sparsity check 0.2999997138977051 +time 1.34 +22 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 8473.02734375 +err_fin 5000.4384765625 +sparsity check 0.2999999672174454 +time 68.90 +22 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1304645.75 +err_fin 1051347.5 +sparsity check 0.2999999906335558 +time 138.87 +22 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1282196.125 +err_fin 1029592.375 +sparsity check 0.2999999906335558 +time 139.18 +22 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 58738.921875 +err_fin 54899.90625 +sparsity check 0.2999999906335558 +time 136.71 +23 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 177000.34375 +err_fin 148722.3125 +sparsity check 0.2999999672174454 +time 75.96 +23 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 83508.140625 +err_fin 72323.1171875 +sparsity check 0.2999997138977051 +time 1.34 +23 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 68578.0234375 +err_fin 62196.5703125 +sparsity check 0.2999997138977051 +time 1.33 +23 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 11660.74609375 +err_fin 6889.0078125 +sparsity check 0.2999999672174454 +time 68.90 +23 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1337517.5 +err_fin 1078256.25 +sparsity check 0.2999999906335558 +time 138.82 +23 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1317734.75 +err_fin 1058014.5 +sparsity check 0.2999999906335558 +time 139.10 +23 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 60790.53515625 +err_fin 56594.65234375 +sparsity check 0.2999999906335558 +time 136.74 +24 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 156468.140625 +err_fin 130504.625 +sparsity check 0.2999999672174454 +time 75.99 +24 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 73629.4375 +err_fin 63010.8984375 +sparsity check 0.2999997138977051 +time 1.38 +24 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 51387.546875 +err_fin 45452.140625 +sparsity check 0.2999997138977051 +time 1.33 +24 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 10108.07421875 +err_fin 6297.9501953125 +sparsity check 0.2999999672174454 +time 68.88 +24 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1303813.0 +err_fin 1052222.75 +sparsity check 0.2999999906335558 +time 138.82 +24 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1292389.875 +err_fin 1040395.8125 +sparsity check 0.2999999906335558 +time 139.15 +24 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 61466.703125 +err_fin 57394.7421875 +sparsity check 0.2999999906335558 +time 136.67 +25 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 178466.796875 +err_fin 148865.34375 +sparsity check 0.2999999672174454 +time 75.99 +25 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 83107.234375 +err_fin 70354.984375 +sparsity check 0.2999997138977051 +time 1.35 +25 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 53946.8515625 +err_fin 47403.734375 +sparsity check 0.2999997138977051 +time 1.32 +25 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 28022.23046875 +err_fin 17113.30078125 +sparsity check 0.2999999672174454 +time 68.86 +25 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1236659.25 +err_fin 973750.0625 +sparsity check 0.2999999906335558 +time 138.74 +25 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1234294.375 +err_fin 968615.1875 +sparsity check 0.2999999906335558 +time 139.09 +25 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 86355.53125 +err_fin 79051.78125 +sparsity check 0.2999999906335558 +time 136.91 +26 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 272898.78125 +err_fin 227520.6875 +sparsity check 0.2999999672174454 +time 75.93 +26 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 120087.921875 +err_fin 103409.90625 +sparsity check 0.2999997138977051 +time 1.37 +26 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 81232.515625 +err_fin 72057.2734375 +sparsity check 0.2999997138977051 +time 1.34 +26 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 26970.46484375 +err_fin 17077.2265625 +sparsity check 0.2999999672174454 +time 68.85 +26 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1413183.75 +err_fin 1126928.5 +sparsity check 0.2999999906335558 +time 138.83 +26 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1405452.5 +err_fin 1116377.125 +sparsity check 0.2999999906335558 +time 139.12 +26 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 85323.671875 +err_fin 79784.0625 +sparsity check 0.2999999906335558 +time 136.91 +27 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 115558.09375 +err_fin 97767.78125 +sparsity check 0.2999999672174454 +time 75.97 +27 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 56699.1953125 +err_fin 49213.171875 +sparsity check 0.2999997138977051 +time 1.35 +27 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 68962.90625 +err_fin 63054.6484375 +sparsity check 0.2999997138977051 +time 1.34 +27 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 11683.6630859375 +err_fin 6646.47314453125 +sparsity check 0.2999999672174454 +time 68.86 +27 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1501156.25 +err_fin 1203228.25 +sparsity check 0.2999999906335558 +time 138.84 +27 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1491076.625 +err_fin 1190134.875 +sparsity check 0.2999999906335558 +time 139.08 +27 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 90162.8984375 +err_fin 84577.515625 +sparsity check 0.2999999906335558 +time 136.92 +28 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 218782.3125 +err_fin 185109.6875 +sparsity check 0.2999999672174454 +time 75.96 +28 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 105500.078125 +err_fin 92746.3125 +sparsity check 0.2999997138977051 +time 1.34 +28 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 105374.5 +err_fin 96035.859375 +sparsity check 0.2999997138977051 +time 1.33 +28 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 19544.38671875 +err_fin 12621.37890625 +sparsity check 0.2999999672174454 +time 68.87 +28 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1587041.75 +err_fin 1279147.75 +sparsity check 0.2999999906335558 +time 138.87 +28 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1575474.875 +err_fin 1265067.375 +sparsity check 0.2999999906335558 +time 139.14 +28 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 95820.46875 +err_fin 90217.296875 +sparsity check 0.2999999906335558 +time 136.90 +29 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 213201.5 +err_fin 181015.28125 +sparsity check 0.2999999672174454 +time 76.00 +29 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 95993.078125 +err_fin 84196.1015625 +sparsity check 0.2999997138977051 +time 1.36 +29 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 114659.6875 +err_fin 104625.4375 +sparsity check 0.2999997138977051 +time 1.33 +29 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 22542.279296875 +err_fin 15099.7080078125 +sparsity check 0.2999999672174454 +time 68.81 +29 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1681974.875 +err_fin 1361460.25 +sparsity check 0.2999999906335558 +time 138.80 +29 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1667598.75 +err_fin 1344047.0 +sparsity check 0.2999999906335558 +time 139.12 +29 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 101028.2734375 +err_fin 95562.359375 +sparsity check 0.2999999906335558 +time 136.77 +30 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 103397.0 +err_fin 88246.9765625 +sparsity check 0.2999999672174454 +time 76.03 +30 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 40666.4921875 +err_fin 35697.8671875 +sparsity check 0.2999997138977051 +time 1.36 +30 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 104980.0078125 +err_fin 96601.2421875 +sparsity check 0.2999997138977051 +time 1.33 +30 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 13561.2197265625 +err_fin 8894.8076171875 +sparsity check 0.2999999672174454 +time 68.89 +30 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1786091.5 +err_fin 1452231.25 +sparsity check 0.2999999906335558 +time 138.83 +30 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1764579.25 +err_fin 1428528.0 +sparsity check 0.2999999906335558 +time 139.25 +30 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 104187.3828125 +err_fin 98655.8359375 +sparsity check 0.2999999906335558 +time 136.77 +31 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 221379.375 +err_fin 188682.6875 +sparsity check 0.2999999672174454 +time 75.98 +31 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 97616.9296875 +err_fin 85803.203125 +sparsity check 0.2999997138977051 +time 1.36 +31 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 113328.21875 +err_fin 101994.34375 +sparsity check 0.2999997138977051 +time 1.33 +31 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 25169.37890625 +err_fin 17087.880859375 +sparsity check 0.2999999672174454 +time 68.82 +31 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1866266.75 +err_fin 1528834.625 +sparsity check 0.2999999906335558 +time 138.85 +31 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1838902.5 +err_fin 1500553.625 +sparsity check 0.2999999906335558 +time 139.15 +31 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 105678.5 +err_fin 100301.6015625 +sparsity check 0.2999999906335558 +time 136.69 +32 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 262803.875 +err_fin 224880.0 +sparsity check 0.2999999672174454 +time 75.97 +32 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 113062.328125 +err_fin 100499.796875 +sparsity check 0.2999997138977051 +time 1.35 +32 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 100919.734375 +err_fin 92579.75 +sparsity check 0.2999997138977051 +time 1.34 +32 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 15704.1376953125 +err_fin 10146.11328125 +sparsity check 0.2999999672174454 +time 68.87 +32 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1915143.25 +err_fin 1565343.0 +sparsity check 0.2999999906335558 +time 138.87 +32 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1892121.5 +err_fin 1540707.0 +sparsity check 0.2999999906335558 +time 139.24 +32 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 112154.046875 +err_fin 106190.984375 +sparsity check 0.2999999906335558 +time 136.78 +33 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 380957.1875 +err_fin 323374.1875 +sparsity check 0.2999999672174454 +time 76.04 +33 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 152019.015625 +err_fin 133161.09375 +sparsity check 0.2999997138977051 +time 1.35 +33 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 112660.0546875 +err_fin 101788.078125 +sparsity check 0.2999997138977051 +time 1.34 +33 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 32713.7578125 +err_fin 20666.58203125 +sparsity check 0.2999999672174454 +time 68.83 +33 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1935303.0 +err_fin 1560679.875 +sparsity check 0.2999999906335558 +time 138.96 +33 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1917896.0 +err_fin 1540437.5 +sparsity check 0.2999999906335558 +time 139.28 +33 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 128665.703125 +err_fin 121072.546875 +sparsity check 0.2999999906335558 +time 136.83 +34 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 122775.3359375 +err_fin 103060.7265625 +sparsity check 0.2999999672174454 +time 75.95 +34 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 53525.4609375 +err_fin 45110.37109375 +sparsity check 0.2999997138977051 +time 1.35 +34 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 67481.703125 +err_fin 59663.5234375 +sparsity check 0.2999997138977051 +time 1.35 +34 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 21456.5234375 +err_fin 13878.259765625 +sparsity check 0.2999999672174454 +time 68.86 +34 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2000206.25 +err_fin 1622827.0 +sparsity check 0.2999999906335558 +time 138.82 +34 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1980603.25 +err_fin 1600789.125 +sparsity check 0.2999999906335558 +time 139.12 +34 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 140142.703125 +err_fin 132631.9375 +sparsity check 0.2999999906335558 +time 136.79 +35 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 214924.96875 +err_fin 182660.640625 +sparsity check 0.2999999672174454 +time 76.11 +35 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 84956.5703125 +err_fin 73921.9296875 +sparsity check 0.2999997138977051 +time 1.35 +35 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 91451.0703125 +err_fin 82992.3828125 +sparsity check 0.2999997138977051 +time 1.34 +35 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 25634.619140625 +err_fin 15644.27734375 +sparsity check 0.2999999672174454 +time 68.99 +35 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2124431.5 +err_fin 1726037.875 +sparsity check 0.2999999906335558 +time 139.11 +35 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2101923.75 +err_fin 1701514.0 +sparsity check 0.2999999906335558 +time 139.44 +35 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 150777.953125 +err_fin 142886.890625 +sparsity check 0.2999999906335558 +time 137.07 +36 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 205836.03125 +err_fin 175031.5625 +sparsity check 0.2999999672174454 +time 76.14 +36 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 84598.546875 +err_fin 73822.7421875 +sparsity check 0.2999997138977051 +time 1.37 +36 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 88803.203125 +err_fin 80859.484375 +sparsity check 0.2999997138977051 +time 1.34 +36 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 18330.46484375 +err_fin 11179.58984375 +sparsity check 0.2999999672174454 +time 68.94 +36 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2172691.0 +err_fin 1762100.875 +sparsity check 0.2999999906335558 +time 139.11 +36 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2152439.5 +err_fin 1739655.5 +sparsity check 0.2999999906335558 +time 139.44 +36 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 168783.984375 +err_fin 159445.0 +sparsity check 0.2999999906335558 +time 136.99 +37 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 341693.96875 +err_fin 289524.4375 +sparsity check 0.2999999672174454 +time 76.10 +37 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 137424.4375 +err_fin 120404.546875 +sparsity check 0.2999997138977051 +time 1.36 +37 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 113853.453125 +err_fin 103941.421875 +sparsity check 0.2999997138977051 +time 1.33 +37 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 30907.140625 +err_fin 17678.953125 +sparsity check 0.2999999672174454 +time 68.96 +37 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2275916.5 +err_fin 1835961.0 +sparsity check 0.2999999906335558 +time 139.10 +37 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2257321.25 +err_fin 1813333.0 +sparsity check 0.2999999906335558 +time 139.47 +37 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 185594.75 +err_fin 175083.84375 +sparsity check 0.2999999906335558 +time 137.00 +38 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 383625.125 +err_fin 322323.53125 +sparsity check 0.2999999672174454 +time 76.01 +38 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 151267.09375 +err_fin 130291.4375 +sparsity check 0.2999997138977051 +time 1.34 +38 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 120968.46875 +err_fin 107977.6875 +sparsity check 0.2999997138977051 +time 1.34 +38 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 49039.015625 +err_fin 28431.6796875 +sparsity check 0.2999999672174454 +time 68.88 +38 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2348146.5 +err_fin 1889757.5 +sparsity check 0.2999999906335558 +time 138.87 +38 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2332156.0 +err_fin 1869847.375 +sparsity check 0.2999999906335558 +time 138.94 +38 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 209499.734375 +err_fin 197284.65625 +sparsity check 0.2999999906335558 +time 136.75 +39 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 472518.0 +err_fin 396747.0 +sparsity check 0.2999999672174454 +time 75.95 +39 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 196093.890625 +err_fin 170486.25 +sparsity check 0.2999997138977051 +time 1.35 +39 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 131558.921875 +err_fin 117586.1328125 +sparsity check 0.2999997138977051 +time 1.34 +39 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 58529.93359375 +err_fin 33396.3984375 +sparsity check 0.2999999672174454 +time 68.86 +39 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2372547.5 +err_fin 1890493.375 +sparsity check 0.2999999906335558 +time 138.83 +39 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2361087.75 +err_fin 1874025.75 +sparsity check 0.2999999906335558 +time 139.10 +39 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 255438.25 +err_fin 237492.125 +sparsity check 0.2999999906335558 +time 136.81 +40 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 439511.4375 +err_fin 365144.625 +sparsity check 0.2999999672174454 +time 75.98 +40 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 170366.28125 +err_fin 147419.84375 +sparsity check 0.2999997138977051 +time 1.34 +40 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 150494.75 +err_fin 131893.71875 +sparsity check 0.2999997138977051 +time 1.34 +40 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 83030.8125 +err_fin 54382.9609375 +sparsity check 0.2999999672174454 +time 68.87 +40 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2463948.5 +err_fin 1940839.25 +sparsity check 0.2999999906335558 +time 138.85 +40 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2448904.25 +err_fin 1920500.5 +sparsity check 0.2999999906335558 +time 139.23 +40 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 289980.5 +err_fin 270139.5625 +sparsity check 0.2999999906335558 +time 136.79 +41 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 354734.1875 +err_fin 291964.53125 +sparsity check 0.2999999672174454 +time 76.02 +41 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 134578.46875 +err_fin 115245.9453125 +sparsity check 0.2999997138977051 +time 1.34 +41 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 130844.421875 +err_fin 115337.03125 +sparsity check 0.2999997138977051 +time 1.34 +41 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 88723.109375 +err_fin 51516.10546875 +sparsity check 0.2999999672174454 +time 68.86 +41 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2564022.0 +err_fin 1986360.125 +sparsity check 0.2999999906335558 +time 138.83 +41 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2507331.75 +err_fin 1930532.125 +sparsity check 0.2999999906335558 +time 139.11 +41 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 338828.03125 +err_fin 312503.9375 +sparsity check 0.2999999906335558 +time 136.91 +42 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 411178.53125 +err_fin 335910.5625 +sparsity check 0.2999999672174454 +time 75.96 +42 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 146400.75 +err_fin 125877.1875 +sparsity check 0.2999997138977051 +time 1.35 +42 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 161474.5625 +err_fin 143672.21875 +sparsity check 0.2999997138977051 +time 1.33 +42 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 82672.375 +err_fin 50431.01171875 +sparsity check 0.2999999672174454 +time 68.84 +42 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2767744.0 +err_fin 2130180.75 +sparsity check 0.2999999906335558 +time 138.80 +42 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2647581.5 +err_fin 2025464.0 +sparsity check 0.2999999906335558 +time 139.09 +42 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 366250.5 +err_fin 337305.78125 +sparsity check 0.2999999906335558 +time 136.68 +43 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 280567.90625 +err_fin 228823.125 +sparsity check 0.2999999672174454 +time 75.96 +43 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 120347.125 +err_fin 102759.8125 +sparsity check 0.2999997138977051 +time 1.35 +43 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 126751.6484375 +err_fin 109898.375 +sparsity check 0.2999997138977051 +time 1.33 +43 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 58906.57421875 +err_fin 38343.58203125 +sparsity check 0.2999999672174454 +time 68.79 +43 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2876848.5 +err_fin 2203414.5 +sparsity check 0.2999999906335558 +time 138.79 +43 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2726980.0 +err_fin 2075451.25 +sparsity check 0.2999999906335558 +time 139.08 +43 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 401723.9375 +err_fin 369312.71875 +sparsity check 0.2999999906335558 +time 136.86 +44 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 508804.59375 +err_fin 415737.625 +sparsity check 0.2999999672174454 +time 75.98 +44 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 150069.484375 +err_fin 130828.453125 +sparsity check 0.2999997138977051 +time 1.35 +44 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 193368.78125 +err_fin 174290.75 +sparsity check 0.2999997138977051 +time 1.33 +44 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 145043.71875 +err_fin 95371.828125 +sparsity check 0.2999999672174454 +time 68.91 +44 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3146891.5 +err_fin 2387051.0 +sparsity check 0.2999999906335558 +time 138.84 +44 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2884849.5 +err_fin 2171657.5 +sparsity check 0.2999999906335558 +time 139.12 +44 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 464940.0625 +err_fin 420746.96875 +sparsity check 0.2999999906335558 +time 136.88 +45 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 317623.25 +err_fin 255841.640625 +sparsity check 0.2999999672174454 +time 75.96 +45 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 126769.59375 +err_fin 108456.75 +sparsity check 0.2999997138977051 +time 1.36 +45 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 187572.5625 +err_fin 168927.65625 +sparsity check 0.2999997138977051 +time 1.34 +45 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 47674.6171875 +err_fin 32155.158203125 +sparsity check 0.2999999672174454 +time 68.86 +45 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3405428.5 +err_fin 2587359.5 +sparsity check 0.2999999906335558 +time 138.85 +45 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3085100.25 +err_fin 2325033.5 +sparsity check 0.2999999906335558 +time 138.80 +45 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 475345.46875 +err_fin 431596.9375 +sparsity check 0.2999999906335558 +time 136.38 +46 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 263395.71875 +err_fin 213005.28125 +sparsity check 0.2999999672174454 +time 75.96 +46 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 99574.15625 +err_fin 86753.2578125 +sparsity check 0.2999997138977051 +time 1.35 +46 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 162039.59375 +err_fin 146753.984375 +sparsity check 0.2999997138977051 +time 1.34 +46 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 63940.453125 +err_fin 44551.078125 +sparsity check 0.2999999672174454 +time 68.87 +46 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3655060.0 +err_fin 2791309.25 +sparsity check 0.2999999906335558 +time 138.78 +46 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3261432.0 +err_fin 2472352.5 +sparsity check 0.2999999906335558 +time 139.14 +46 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 475824.5 +err_fin 435338.125 +sparsity check 0.2999999906335558 +time 136.65 +47 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 364249.84375 +err_fin 295973.9375 +sparsity check 0.2999999672174454 +time 75.92 +47 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 146091.640625 +err_fin 126103.03125 +sparsity check 0.2999997138977051 +time 1.35 +47 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 160440.15625 +err_fin 143679.96875 +sparsity check 0.2999997138977051 +time 1.34 +47 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 98694.78125 +err_fin 55647.3125 +sparsity check 0.2999999672174454 +time 68.85 +47 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3775736.75 +err_fin 2854559.0 +sparsity check 0.2999999906335558 +time 138.80 +47 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3327351.25 +err_fin 2494366.0 +sparsity check 0.2999999906335558 +time 139.14 +47 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 534005.25 +err_fin 482482.3125 +sparsity check 0.2999999906335558 +time 136.75 +48 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 172792.40625 +err_fin 139728.265625 +sparsity check 0.2999999672174454 +time 75.99 +48 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 57409.11328125 +err_fin 49361.9609375 +sparsity check 0.2999997138977051 +time 1.37 +48 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 152449.59375 +err_fin 137785.90625 +sparsity check 0.2999997138977051 +time 1.33 +48 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 61970.984375 +err_fin 41327.55078125 +sparsity check 0.2999999672174454 +time 68.82 +48 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3948376.5 +err_fin 2967533.5 +sparsity check 0.2999999906335558 +time 138.81 +48 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3463526.5 +err_fin 2581677.0 +sparsity check 0.2999999906335558 +time 139.12 +48 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 531346.875 +err_fin 482351.875 +sparsity check 0.2999999906335558 +time 136.92 +49 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 92701.71875 +err_fin 74674.328125 +sparsity check 0.2999999672174454 +time 75.92 +49 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 39942.46875 +err_fin 34114.578125 +sparsity check 0.2999997138977051 +time 1.34 +49 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 124484.96875 +err_fin 110166.25 +sparsity check 0.2999997138977051 +time 1.33 +49 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 23617.455078125 +err_fin 14932.462890625 +sparsity check 0.2999999672174454 +time 68.82 +49 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4097844.75 +err_fin 3080826.75 +sparsity check 0.2999999906335558 +time 138.81 +49 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3592342.5 +err_fin 2677633.5 +sparsity check 0.2999999906335558 +time 139.21 +49 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 533097.8125 +err_fin 486073.15625 +sparsity check 0.2999999906335558 +time 136.77 +50 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 154342.25 +err_fin 124155.21875 +sparsity check 0.2999999672174454 +time 75.98 +50 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 52400.37890625 +err_fin 45099.2734375 +sparsity check 0.2999997138977051 +time 1.33 +50 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 159195.15625 +err_fin 142175.875 +sparsity check 0.2999997138977051 +time 1.33 +50 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 51435.8125 +err_fin 35645.078125 +sparsity check 0.2999999672174454 +time 68.90 +50 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4253453.0 +err_fin 3199545.0 +sparsity check 0.2999999906335558 +time 138.84 +50 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3711681.5 +err_fin 2771113.5 +sparsity check 0.2999999906335558 +time 139.13 +50 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 534982.4375 +err_fin 488952.84375 +sparsity check 0.2999999906335558 +time 136.90 +51 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 203378.9375 +err_fin 163606.921875 +sparsity check 0.2999999672174454 +time 76.02 +51 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 75364.7109375 +err_fin 65284.08203125 +sparsity check 0.2999997138977051 +time 1.36 +51 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 164796.984375 +err_fin 149017.71875 +sparsity check 0.2999997138977051 +time 1.34 +51 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 46578.84765625 +err_fin 30409.25 +sparsity check 0.2999999672174454 +time 68.87 +51 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4372869.5 +err_fin 3297004.75 +sparsity check 0.2999999906335558 +time 138.84 +51 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3827711.5 +err_fin 2864346.0 +sparsity check 0.2999999906335558 +time 139.24 +51 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 538380.125 +err_fin 493029.0 +sparsity check 0.2999999906335558 +time 136.94 +52 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 343205.59375 +err_fin 277933.5 +sparsity check 0.2999999672174454 +time 75.94 +52 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 97406.1171875 +err_fin 85302.9453125 +sparsity check 0.2999997138977051 +time 1.35 +52 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 188485.796875 +err_fin 169885.046875 +sparsity check 0.2999997138977051 +time 1.34 +52 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 87558.890625 +err_fin 55424.01953125 +sparsity check 0.2999999672174454 +time 68.87 +52 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4470279.5 +err_fin 3383197.5 +sparsity check 0.2999999906335558 +time 138.96 +52 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3904438.5 +err_fin 2933288.5 +sparsity check 0.2999999906335558 +time 139.29 +52 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 545559.8125 +err_fin 500625.75 +sparsity check 0.2999999906335558 +time 136.99 +53 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 98386.859375 +err_fin 79794.640625 +sparsity check 0.2999999672174454 +time 75.96 +53 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 30940.599609375 +err_fin 26762.8828125 +sparsity check 0.2999997138977051 +time 1.35 +53 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 157410.75 +err_fin 141510.515625 +sparsity check 0.2999997138977051 +time 1.35 +53 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 35727.7890625 +err_fin 23910.728515625 +sparsity check 0.2999999672174454 +time 68.88 +53 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4587562.5 +err_fin 3464021.0 +sparsity check 0.2999999906335558 +time 138.88 +53 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4015500.5 +err_fin 3009185.75 +sparsity check 0.2999999906335558 +time 138.90 +53 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 551366.0 +err_fin 506246.21875 +sparsity check 0.2999999906335558 +time 136.82 +54 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 115892.796875 +err_fin 93850.4921875 +sparsity check 0.2999999672174454 +time 76.00 +54 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 36039.2734375 +err_fin 30834.34765625 +sparsity check 0.2999997138977051 +time 1.34 +54 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 162464.390625 +err_fin 146485.0625 +sparsity check 0.2999997138977051 +time 1.33 +54 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 44281.5078125 +err_fin 28517.87890625 +sparsity check 0.2999999672174454 +time 68.87 +54 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4700598.0 +err_fin 3554828.5 +sparsity check 0.2999999906335558 +time 138.84 +54 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4124973.0 +err_fin 3096894.5 +sparsity check 0.2999999906335558 +time 139.31 +54 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 551188.875 +err_fin 507271.125 +sparsity check 0.2999999906335558 +time 136.79 +55 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 192386.921875 +err_fin 156298.625 +sparsity check 0.2999999672174454 +time 76.08 +55 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 68489.171875 +err_fin 59610.7265625 +sparsity check 0.2999997138977051 +time 1.35 +55 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 213937.28125 +err_fin 192016.8125 +sparsity check 0.2999997138977051 +time 1.33 +55 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 85003.484375 +err_fin 55887.3359375 +sparsity check 0.2999999672174454 +time 68.81 +55 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4787985.5 +err_fin 3627535.75 +sparsity check 0.2999999906335558 +time 138.80 +55 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4230457.0 +err_fin 3182817.75 +sparsity check 0.2999999906335558 +time 139.23 +55 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 562564.5625 +err_fin 519353.1875 +sparsity check 0.2999999906335558 +time 136.79 +56 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 145041.234375 +err_fin 117943.0859375 +sparsity check 0.2999999672174454 +time 76.03 +56 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 54334.1484375 +err_fin 46924.34375 +sparsity check 0.2999997138977051 +time 1.36 +56 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 150658.5625 +err_fin 135525.34375 +sparsity check 0.2999997138977051 +time 1.35 +56 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 42762.5 +err_fin 26602.42578125 +sparsity check 0.2999999672174454 +time 68.92 +56 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4849524.0 +err_fin 3669309.5 +sparsity check 0.2999999906335558 +time 138.91 +56 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4295199.0 +err_fin 3226766.5 +sparsity check 0.2999999906335558 +time 139.18 +56 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 568823.25 +err_fin 525306.875 +sparsity check 0.2999999906335558 +time 136.89 +57 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 126576.09375 +err_fin 102643.7109375 +sparsity check 0.2999999672174454 +time 75.94 +57 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 42347.52734375 +err_fin 37046.171875 +sparsity check 0.2999997138977051 +time 1.34 +57 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 191894.734375 +err_fin 173407.640625 +sparsity check 0.2999997138977051 +time 1.34 +57 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 45342.23046875 +err_fin 30498.8671875 +sparsity check 0.2999999672174454 +time 68.85 +57 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5016962.0 +err_fin 3800603.0 +sparsity check 0.2999999906335558 +time 138.85 +57 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4454324.5 +err_fin 3350025.5 +sparsity check 0.2999999906335558 +time 139.16 +57 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 579653.125 +err_fin 536391.1875 +sparsity check 0.2999999906335558 +time 136.78 +58 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 66646.84375 +err_fin 54362.26171875 +sparsity check 0.2999999672174454 +time 75.95 +58 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 21354.5625 +err_fin 18121.306640625 +sparsity check 0.2999997138977051 +time 1.36 +58 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 137431.15625 +err_fin 122708.6875 +sparsity check 0.2999997138977051 +time 1.34 +58 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 28380.96875 +err_fin 18636.51171875 +sparsity check 0.2999999672174454 +time 68.83 +58 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5121918.5 +err_fin 3885969.75 +sparsity check 0.2999999906335558 +time 138.87 +58 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4555366.0 +err_fin 3433220.0 +sparsity check 0.2999999906335558 +time 139.20 +58 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 582902.4375 +err_fin 540428.125 +sparsity check 0.2999999906335558 +time 137.03 +59 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 88575.640625 +err_fin 71788.8125 +sparsity check 0.2999999672174454 +time 75.94 +59 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 26823.66015625 +err_fin 23156.845703125 +sparsity check 0.2999997138977051 +time 1.35 +59 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 152805.875 +err_fin 135758.5625 +sparsity check 0.2999997138977051 +time 1.34 +59 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 27197.44140625 +err_fin 17258.873046875 +sparsity check 0.2999999672174454 +time 68.86 +59 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5239635.0 +err_fin 3983008.75 +sparsity check 0.2999999906335558 +time 138.78 +59 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4665852.0 +err_fin 3524699.75 +sparsity check 0.2999999906335558 +time 139.09 +59 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 582928.3125 +err_fin 541999.0625 +sparsity check 0.2999999906335558 +time 136.90 +60 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 12798.548828125 +err_fin 10278.3193359375 +sparsity check 0.2999999672174454 +time 75.93 +60 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 3548.48876953125 +err_fin 2970.5234375 +sparsity check 0.2999997138977051 +time 1.35 +60 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 82546.890625 +err_fin 73781.4765625 +sparsity check 0.2999997138977051 +time 1.34 +60 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 15324.7158203125 +err_fin 10076.595703125 +sparsity check 0.2999999672174454 +time 68.79 +60 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5312578.5 +err_fin 4051645.75 +sparsity check 0.2999999906335558 +time 138.96 +60 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4749171.0 +err_fin 3602157.5 +sparsity check 0.2999999906335558 +time 139.38 +60 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 578575.6875 +err_fin 539150.25 +sparsity check 0.2999999906335558 +time 136.96 +61 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 54320.1015625 +err_fin 44391.296875 +sparsity check 0.2999999672174454 +time 75.97 +61 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 16122.8203125 +err_fin 13871.373046875 +sparsity check 0.2999997138977051 +time 1.34 +61 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 140027.9375 +err_fin 126466.546875 +sparsity check 0.2999997138977051 +time 1.33 +61 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 24995.88671875 +err_fin 16653.05078125 +sparsity check 0.2999999672174454 +time 68.86 +61 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5411788.0 +err_fin 4130672.5 +sparsity check 0.2999999906335558 +time 139.06 +61 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4861670.0 +err_fin 3689423.0 +sparsity check 0.2999999906335558 +time 139.50 +61 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 580966.125 +err_fin 542406.75 +sparsity check 0.2999999906335558 +time 136.93 +62 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 57740.171875 +err_fin 47270.33203125 +sparsity check 0.2999999672174454 +time 76.05 +62 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 19181.212890625 +err_fin 16542.439453125 +sparsity check 0.2999997138977051 +time 1.34 +62 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 151797.3125 +err_fin 135003.84375 +sparsity check 0.2999997138977051 +time 1.35 +62 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 20518.494140625 +err_fin 12232.9052734375 +sparsity check 0.2999999672174454 +time 68.94 +62 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5527104.5 +err_fin 4233280.5 +sparsity check 0.2999999906335558 +time 139.10 +62 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4990665.0 +err_fin 3798818.5 +sparsity check 0.2999999906335558 +time 139.41 +62 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 587051.875 +err_fin 548654.25 +sparsity check 0.2999999906335558 +time 137.18 +63 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 53336.2421875 +err_fin 43936.7578125 +sparsity check 0.2999999672174454 +time 76.09 +63 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 14450.50390625 +err_fin 12411.6513671875 +sparsity check 0.2999997138977051 +time 1.36 +63 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 119009.359375 +err_fin 104587.875 +sparsity check 0.2999997138977051 +time 1.34 +63 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 18648.859375 +err_fin 12419.607421875 +sparsity check 0.2999999672174454 +time 68.98 +63 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5644198.0 +err_fin 4332246.0 +sparsity check 0.2999999906335558 +time 139.17 +63 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5100512.0 +err_fin 3895234.75 +sparsity check 0.2999999906335558 +time 139.48 +63 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 593314.5 +err_fin 555283.4375 +sparsity check 0.2999999906335558 +time 137.05 +64 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 102718.59375 +err_fin 84217.453125 +sparsity check 0.2999999672174454 +time 76.04 +64 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 33677.140625 +err_fin 28981.064453125 +sparsity check 0.2999997138977051 +time 1.36 +64 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 171384.203125 +err_fin 154471.34375 +sparsity check 0.2999997138977051 +time 1.35 +64 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 33770.703125 +err_fin 21257.568359375 +sparsity check 0.2999999672174454 +time 68.89 +64 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5753349.0 +err_fin 4422938.0 +sparsity check 0.2999999906335558 +time 138.94 +64 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5216544.0 +err_fin 3991321.0 +sparsity check 0.2999999906335558 +time 139.22 +64 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 602094.5 +err_fin 563713.875 +sparsity check 0.2999999906335558 +time 136.81 +65 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 26355.453125 +err_fin 21620.01953125 +sparsity check 0.2999999672174454 +time 75.99 +65 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 6515.88671875 +err_fin 5465.53125 +sparsity check 0.2999997138977051 +time 1.36 +65 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 94989.2734375 +err_fin 83817.53125 +sparsity check 0.2999997138977051 +time 1.33 +65 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 14039.490234375 +err_fin 8782.720703125 +sparsity check 0.2999999672174454 +time 68.94 +65 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5849988.0 +err_fin 4509393.0 +sparsity check 0.2999999906335558 +time 138.85 +65 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5327747.0 +err_fin 4088452.5 +sparsity check 0.2999999906335558 +time 139.20 +65 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 611602.875 +err_fin 573357.125 +sparsity check 0.2999999906335558 +time 136.75 +66 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 44273.30859375 +err_fin 36516.1875 +sparsity check 0.2999999672174454 +time 75.97 +66 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 12960.013671875 +err_fin 10998.443359375 +sparsity check 0.2999997138977051 +time 1.37 +66 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 125497.7734375 +err_fin 112727.65625 +sparsity check 0.2999997138977051 +time 1.33 +66 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 30598.025390625 +err_fin 21054.765625 +sparsity check 0.2999999672174454 +time 68.93 +66 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5992271.0 +err_fin 4617096.0 +sparsity check 0.2999999906335558 +time 138.77 +66 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5491715.5 +err_fin 4211823.0 +sparsity check 0.2999999906335558 +time 139.10 +66 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 640144.5625 +err_fin 599823.5 +sparsity check 0.2999999906335558 +time 136.91 +67 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 21747.685546875 +err_fin 17695.83984375 +sparsity check 0.2999999672174454 +time 75.93 +67 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 5541.3466796875 +err_fin 4561.103515625 +sparsity check 0.2999997138977051 +time 1.36 +67 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 51526.2890625 +err_fin 44337.6171875 +sparsity check 0.2999997138977051 +time 1.34 +67 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 9575.8984375 +err_fin 5245.2421875 +sparsity check 0.2999999672174454 +time 68.88 +67 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6040048.0 +err_fin 4659346.0 +sparsity check 0.2999999906335558 +time 138.87 +67 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5572547.5 +err_fin 4280505.5 +sparsity check 0.2999999906335558 +time 139.21 +67 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 641468.875 +err_fin 600853.75 +sparsity check 0.2999999906335558 +time 136.85 +68 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 206237.96875 +err_fin 169046.53125 +sparsity check 0.2999999672174454 +time 75.99 +68 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 74574.7734375 +err_fin 64934.2890625 +sparsity check 0.2999997138977051 +time 1.37 +68 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 244333.90625 +err_fin 221241.34375 +sparsity check 0.2999997138977051 +time 1.33 +68 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 30313.84375 +err_fin 20368.40234375 +sparsity check 0.2999999672174454 +time 68.93 +68 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6263252.0 +err_fin 4839256.5 +sparsity check 0.2999999906335558 +time 138.87 +68 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5811772.0 +err_fin 4470888.5 +sparsity check 0.2999999906335558 +time 139.21 +68 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 687192.75 +err_fin 642274.625 +sparsity check 0.2999999906335558 +time 136.80 +69 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 366524.09375 +err_fin 299911.40625 +sparsity check 0.2999999672174454 +time 75.93 +69 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 163258.78125 +err_fin 142651.890625 +sparsity check 0.2999997138977051 +time 1.37 +69 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 325972.21875 +err_fin 290618.65625 +sparsity check 0.2999997138977051 +time 1.32 +69 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 51660.125 +err_fin 33025.34375 +sparsity check 0.2999999672174454 +time 68.86 +69 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6389854.0 +err_fin 4935137.0 +sparsity check 0.2999999906335558 +time 138.90 +69 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5991463.5 +err_fin 4607961.5 +sparsity check 0.2999999906335558 +time 139.25 +69 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 720987.375 +err_fin 674224.75 +sparsity check 0.2999999906335558 +time 136.86 +70 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 200002.9375 +err_fin 163869.09375 +sparsity check 0.2999999672174454 +time 75.96 +70 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 76289.9375 +err_fin 65971.640625 +sparsity check 0.2999997138977051 +time 1.35 +70 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 185622.34375 +err_fin 164882.9375 +sparsity check 0.2999997138977051 +time 1.33 +70 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 38191.6953125 +err_fin 22749.255859375 +sparsity check 0.2999999672174454 +time 68.84 +70 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6531525.0 +err_fin 5040001.0 +sparsity check 0.2999999906335558 +time 138.79 +70 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6153728.0 +err_fin 4732230.0 +sparsity check 0.2999999906335558 +time 139.11 +70 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 769950.0 +err_fin 719316.5625 +sparsity check 0.2999999906335558 +time 136.89 +71 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 284382.1875 +err_fin 232780.40625 +sparsity check 0.2999999672174454 +time 75.93 +71 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 123754.65625 +err_fin 107557.0625 +sparsity check 0.2999997138977051 +time 1.36 +71 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 198829.28125 +err_fin 178026.65625 +sparsity check 0.2999997138977051 +time 1.32 +71 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 50413.4140625 +err_fin 30423.818359375 +sparsity check 0.2999999672174454 +time 68.88 +71 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6734894.0 +err_fin 5184268.0 +sparsity check 0.2999999906335558 +time 138.86 +71 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6357098.0 +err_fin 4879386.0 +sparsity check 0.2999999906335558 +time 139.14 +71 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 811461.6875 +err_fin 756491.875 +sparsity check 0.2999999906335558 +time 136.68 +72 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 408341.9375 +err_fin 332715.6875 +sparsity check 0.2999999672174454 +time 75.90 +72 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 170334.28125 +err_fin 148576.5625 +sparsity check 0.2999997138977051 +time 1.37 +72 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 232841.8125 +err_fin 209264.15625 +sparsity check 0.2999997138977051 +time 1.34 +72 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 66256.8125 +err_fin 44045.484375 +sparsity check 0.2999999672174454 +time 68.89 +72 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6898981.0 +err_fin 5295198.0 +sparsity check 0.2999999906335558 +time 138.82 +72 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6553801.0 +err_fin 5015821.0 +sparsity check 0.2999999906335558 +time 139.15 +72 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 872650.0625 +err_fin 812148.125 +sparsity check 0.2999999906335558 +time 136.76 +73 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 378989.0 +err_fin 307307.5 +sparsity check 0.2999999672174454 +time 75.96 +73 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 165181.125 +err_fin 143724.21875 +sparsity check 0.2999997138977051 +time 1.40 +73 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 264383.25 +err_fin 233683.65625 +sparsity check 0.2999997138977051 +time 1.33 +73 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 42146.91015625 +err_fin 28379.98828125 +sparsity check 0.2999999672174454 +time 68.90 +73 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 7066649.0 +err_fin 5405444.0 +sparsity check 0.2999999906335558 +time 138.88 +73 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6746659.0 +err_fin 5147261.0 +sparsity check 0.2999999906335558 +time 139.23 +73 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 962808.6875 +err_fin 890883.9375 +sparsity check 0.2999999906335558 +time 136.70 +74 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 341475.875 +err_fin 275513.9375 +sparsity check 0.2999999672174454 +time 75.94 +74 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 141449.484375 +err_fin 120626.625 +sparsity check 0.2999997138977051 +time 1.35 +74 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 202775.5625 +err_fin 176957.40625 +sparsity check 0.2999997138977051 +time 1.34 +74 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 90303.7109375 +err_fin 52464.86328125 +sparsity check 0.2999999672174454 +time 68.85 +74 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 7239382.0 +err_fin 5476796.0 +sparsity check 0.2999999906335558 +time 138.81 +74 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6935505.5 +err_fin 5232003.0 +sparsity check 0.2999999906335558 +time 139.13 +74 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1066884.75 +err_fin 980786.3125 +sparsity check 0.2999999906335558 +time 136.83 +75 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 350630.28125 +err_fin 279884.8125 +sparsity check 0.2999999672174454 +time 76.00 +75 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 138135.21875 +err_fin 116699.4453125 +sparsity check 0.2999997138977051 +time 1.35 +75 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 233201.03125 +err_fin 199713.890625 +sparsity check 0.2999997138977051 +time 1.34 +75 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 88134.2421875 +err_fin 51477.3515625 +sparsity check 0.2999999672174454 +time 68.91 +75 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 7317242.0 +err_fin 5489530.0 +sparsity check 0.2999999906335558 +time 138.88 +75 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 7001968.0 +err_fin 5241092.0 +sparsity check 0.2999999906335558 +time 138.91 +75 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1205966.0 +err_fin 1095852.5 +sparsity check 0.2999999906335558 +time 136.46 +76 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 533007.0 +err_fin 417794.25 +sparsity check 0.2999999672174454 +time 75.93 +76 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 203242.84375 +err_fin 168667.953125 +sparsity check 0.2999997138977051 +time 1.34 +76 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 371379.875 +err_fin 314949.1875 +sparsity check 0.2999997138977051 +time 1.34 +76 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 271257.5 +err_fin 169999.609375 +sparsity check 0.2999999672174454 +time 68.88 +76 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 7275406.5 +err_fin 5352736.5 +sparsity check 0.2999999906335558 +time 138.82 +76 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6911096.0 +err_fin 5068489.0 +sparsity check 0.2999999906335558 +time 139.15 +76 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1480580.5 +err_fin 1310576.75 +sparsity check 0.2999999906335558 +time 136.81 +77 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 355251.8125 +err_fin 271370.0625 +sparsity check 0.2999999672174454 +time 75.95 +77 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 144754.375 +err_fin 117778.796875 +sparsity check 0.2999997138977051 +time 1.35 +77 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 251212.9375 +err_fin 208745.71875 +sparsity check 0.2999997138977051 +time 1.34 +77 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 147515.015625 +err_fin 74935.5703125 +sparsity check 0.2999999672174454 +time 68.82 +77 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6806460.0 +err_fin 4879777.0 +sparsity check 0.2999999906335558 +time 138.76 +77 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6467157.0 +err_fin 4620484.5 +sparsity check 0.2999999906335558 +time 138.78 +77 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1742981.375 +err_fin 1490763.75 +sparsity check 0.2999999906335558 +time 136.43 +78 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 319464.34375 +err_fin 234997.234375 +sparsity check 0.2999999672174454 +time 75.94 +78 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 141533.734375 +err_fin 110442.96875 +sparsity check 0.2999997138977051 +time 1.36 +78 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 219353.734375 +err_fin 182995.84375 +sparsity check 0.2999997138977051 +time 1.33 +78 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 89409.109375 +err_fin 47214.2734375 +sparsity check 0.2999999672174454 +time 68.88 +78 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5719522.0 +err_fin 3957060.5 +sparsity check 0.2999999906335558 +time 138.79 +78 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5377522.0 +err_fin 3702593.0 +sparsity check 0.2999999906335558 +time 139.14 +78 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1843521.25 +err_fin 1475283.75 +sparsity check 0.2999999906335558 +time 136.86 +79 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 204026.3125 +err_fin 139487.578125 +sparsity check 0.2999999672174454 +time 75.98 +79 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 111417.5390625 +err_fin 84433.484375 +sparsity check 0.2999997138977051 +time 1.35 +79 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 98769.3203125 +err_fin 77037.796875 +sparsity check 0.2999997138977051 +time 1.35 +79 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 25067.11328125 +err_fin 9912.486328125 +sparsity check 0.2999999672174454 +time 68.86 +79 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3504688.5 +err_fin 2242649.5 +sparsity check 0.2999999906335558 +time 138.88 +79 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3212513.0 +err_fin 2050016.75 +sparsity check 0.2999999906335558 +time 139.19 +79 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1486937.5 +err_fin 1014967.25 +sparsity check 0.2999999906335558 +time 136.74 +model.embed_tokens.weight tensor(2.5520e-06) +model.layers.0.self_attn.q_proj.weight tensor(0.0139) +model.layers.0.self_attn.k_proj.weight tensor(0.0296) +model.layers.0.self_attn.v_proj.weight tensor(0.0791) +model.layers.0.self_attn.o_proj.weight tensor(4.0084e-06) +model.layers.0.mlp.gate_proj.weight tensor(0.0001) +model.layers.0.mlp.up_proj.weight tensor(0.0001) +model.layers.0.mlp.down_proj.weight tensor(0.0185) +51398.38483381271 +Dataset: wikitext2 +Evaluating ... +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +Perplexity: 4.561863 diff --git a/logs/llama2-70-0.7-fix-mask b/logs/llama2-70-0.7-fix-mask new file mode 100644 index 0000000..b499385 --- /dev/null +++ b/logs/llama2-70-0.7-fix-mask @@ -0,0 +1,4020 @@ +Running on dev: cuda:0 +loading llama +llama loaded +Starting... on device cuda:0 +model.layers.0.self_attn.q_proj.weight torch.Size([8192, 8192]) (8192, 8192) 0.1 +model.layers.0.self_attn.k_proj.weight torch.Size([1024, 8192]) (1024, 8192) 0.2 +model.layers.0.mlp.gate_proj.weight torch.Size([28672, 8192]) (8192, 28672) 0.2 +Ready. +0 self_attn.q_proj +Pruning ... +0.28259551525115967 0.08259549736976624 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1331873331981834 0.9709505944546686 1.0 +err_prefin 146.79672241210938 +err_fin 42.25421905517578 +sparsity check 0.28259551525115967 +time 75.06 +0 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 143.55474853515625 +err_fin 39.225311279296875 +sparsity check 0.29999983310699463 +time 1.33 +0 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 18.346843719482422 +err_fin 9.327608108520508 +sparsity check 0.29999983310699463 +time 1.33 +0 self_attn.o_proj +Pruning ... +0.2628120183944702 0.06281200051307678 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.0604359525883797 0.9709505944546686 1.0 +err_prefin 7.877456188201904 +err_fin 0.5982409715652466 +sparsity check 0.2628120183944702 +time 67.91 +0 mlp.gate_proj +Pruning ... +0.29999276995658875 0.1999746859073639 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060101194662678 0.9709505944546686 1.0 +err_prefin 2311.3427734375 +err_fin 819.235107421875 +sparsity check 0.29999276995658875 +time 137.85 +0 mlp.up_proj +Pruning ... +0.29999276995658875 0.1999746859073639 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060101194662678 0.9709505944546686 1.0 +err_prefin 2341.20654296875 +err_fin 820.5517578125 +sparsity check 0.29999276995658875 +time 138.12 +0 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 51.23821258544922 +err_fin 22.18744659423828 +sparsity check 0.29999999489103046 +time 135.63 +1 self_attn.q_proj +Pruning ... +0.2640542834997177 0.06405426561832428 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.0652610300661904 0.9709505944546686 1.0 +err_prefin 2363.85888671875 +err_fin 517.98046875 +sparsity check 0.2640542834997177 +time 75.10 +1 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 2169.40087890625 +err_fin 510.3336181640625 +sparsity check 0.29999983310699463 +time 1.31 +1 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 127.56815338134766 +err_fin 58.54473114013672 +sparsity check 0.29999983310699463 +time 1.30 +1 self_attn.o_proj +Pruning ... +0.2670059949159622 0.06700597703456879 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.0765780080386531 0.9709505944546686 1.0 +err_prefin 146.55723571777344 +err_fin 31.407405853271484 +sparsity check 0.2670059949159622 +time 67.95 +1 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 19178.1171875 +err_fin 6323.96240234375 +sparsity check 0.29999999489103046 +time 137.86 +1 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 21308.6640625 +err_fin 6805.1181640625 +sparsity check 0.29999999489103046 +time 138.07 +1 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 732.9593505859375 +err_fin 566.9931640625 +sparsity check 0.29999999489103046 +time 135.70 +2 self_attn.q_proj +Pruning ... +0.2932608723640442 0.09326085448265076 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1691896024446529 0.9709505944546686 1.0 +err_prefin 7611.6748046875 +err_fin 2654.669921875 +sparsity check 0.2932608723640442 +time 75.02 +2 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 9215.1044921875 +err_fin 3673.41015625 +sparsity check 0.29999983310699463 +time 1.34 +2 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 577.298095703125 +err_fin 340.20489501953125 +sparsity check 0.29999983310699463 +time 1.32 +2 self_attn.o_proj +Pruning ... +0.2901856452226639 0.09018562734127045 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1590172442785198 0.9709505944546686 1.0 +err_prefin 695.6298217773438 +err_fin 261.84814453125 +sparsity check 0.2901856452226639 +time 67.89 +2 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 71745.296875 +err_fin 31993.83984375 +sparsity check 0.29999999489103046 +time 137.81 +2 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 74531.984375 +err_fin 32673.53125 +sparsity check 0.29999999489103046 +time 138.22 +2 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 2582.0546875 +err_fin 2160.287109375 +sparsity check 0.29999999489103046 +time 135.61 +3 self_attn.q_proj +Pruning ... +0.29991354048252106 0.09991352260112762 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1906495374166257 0.9709505944546686 1.0 +err_prefin 62766.71875 +err_fin 31050.1875 +sparsity check 0.29991354048252106 +time 74.96 +3 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 40575.09375 +err_fin 23052.455078125 +sparsity check 0.29999983310699463 +time 1.32 +3 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 7380.71875 +err_fin 4846.990234375 +sparsity check 0.29999983310699463 +time 1.31 +3 self_attn.o_proj +Pruning ... +0.29522277414798737 0.09522275626659393 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1755946479997674 0.9709505944546686 1.0 +err_prefin 1127.72119140625 +err_fin 557.7116088867188 +sparsity check 0.29522277414798737 +time 67.83 +3 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 151899.5625 +err_fin 81245.1875 +sparsity check 0.29999999489103046 +time 137.76 +3 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 151964.3125 +err_fin 80370.4921875 +sparsity check 0.29999999489103046 +time 138.06 +3 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 3375.807861328125 +err_fin 2967.864990234375 +sparsity check 0.29999999489103046 +time 135.80 +4 self_attn.q_proj +Pruning ... +0.29966770112514496 0.09966768324375153 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1898694190435064 0.9709505944546686 1.0 +err_prefin 116636.96875 +err_fin 62684.6953125 +sparsity check 0.29966770112514496 +time 74.93 +4 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 53774.46875 +err_fin 30973.330078125 +sparsity check 0.29999983310699463 +time 1.34 +4 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 11458.361328125 +err_fin 8175.4697265625 +sparsity check 0.29999983310699463 +time 1.31 +4 self_attn.o_proj +Pruning ... +0.29358045756816864 0.09358043968677521 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1702374010967032 0.9709505944546686 1.0 +err_prefin 1469.5662841796875 +err_fin 767.6857299804688 +sparsity check 0.29358045756816864 +time 67.85 +4 mlp.gate_proj +Pruning ... +0.2999720743724278 0.1999022513628006 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0059687168141702 0.9709505944546686 1.0 +err_prefin 241601.171875 +err_fin 143953.28125 +sparsity check 0.2999720743724278 +time 137.78 +4 mlp.up_proj +Pruning ... +0.2999230538095747 0.19973067939281464 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0058705945342121 0.9709505944546686 1.0 +err_prefin 236393.453125 +err_fin 139779.84375 +sparsity check 0.2999230538095747 +time 138.19 +4 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 5088.666015625 +err_fin 4538.8662109375 +sparsity check 0.29999999489103046 +time 135.71 +5 self_attn.q_proj +Pruning ... +0.2999517172574997 0.09995169937610626 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1907705961829542 0.9709505944546686 1.0 +err_prefin 182847.59375 +err_fin 107919.828125 +sparsity check 0.2999517172574997 +time 74.95 +5 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 78792.828125 +err_fin 51433.125 +sparsity check 0.29999983310699463 +time 1.33 +5 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 17187.236328125 +err_fin 12823.0703125 +sparsity check 0.29999983310699463 +time 1.31 +5 self_attn.o_proj +Pruning ... +0.2964719831943512 0.09647196531295776 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1796392943690934 0.9709505944546686 1.0 +err_prefin 2450.074462890625 +err_fin 1195.0565185546875 +sparsity check 0.2964719831943512 +time 67.84 +5 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 298847.28125 +err_fin 187423.8125 +sparsity check 0.29999999489103046 +time 137.77 +5 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 291472.53125 +err_fin 181482.3125 +sparsity check 0.29999999489103046 +time 138.06 +5 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 7176.54296875 +err_fin 6329.83203125 +sparsity check 0.29999999489103046 +time 135.79 +6 self_attn.q_proj +Pruning ... +0.29998789727687836 0.09998787939548492 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908853016546297 0.9709505944546686 1.0 +err_prefin 206867.84375 +err_fin 128255.5703125 +sparsity check 0.29998789727687836 +time 74.92 +6 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 83216.7734375 +err_fin 57516.95703125 +sparsity check 0.29999983310699463 +time 1.34 +6 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 19365.34765625 +err_fin 15202.09375 +sparsity check 0.29999983310699463 +time 1.33 +6 self_attn.o_proj +Pruning ... +0.2905711680650711 0.09057115018367767 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1603015048610001 0.9709505944546686 1.0 +err_prefin 4286.0869140625 +err_fin 2491.453125 +sparsity check 0.2905711680650711 +time 67.86 +6 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 395216.5 +err_fin 255924.375 +sparsity check 0.29999999489103046 +time 137.76 +6 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 382376.84375 +err_fin 246421.4375 +sparsity check 0.29999999489103046 +time 138.20 +6 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 10638.2509765625 +err_fin 9528.076171875 +sparsity check 0.29999999489103046 +time 135.73 +7 self_attn.q_proj +Pruning ... +0.29998789727687836 0.09998787939548492 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908853016546297 0.9709505944546686 1.0 +err_prefin 341452.1875 +err_fin 224390.28125 +sparsity check 0.29998789727687836 +time 74.94 +7 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 114694.46875 +err_fin 84191.5 +sparsity check 0.29999983310699463 +time 1.32 +7 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 27540.373046875 +err_fin 22415.71484375 +sparsity check 0.29999983310699463 +time 1.31 +7 self_attn.o_proj +Pruning ... +0.2873151898384094 0.08731517195701599 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1493722758084872 0.9709505944546686 1.0 +err_prefin 5619.8203125 +err_fin 3063.083251953125 +sparsity check 0.2873151898384094 +time 67.88 +7 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 486262.3125 +err_fin 324045.4375 +sparsity check 0.29999999489103046 +time 137.77 +7 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 468986.375 +err_fin 311134.46875 +sparsity check 0.29999999489103046 +time 138.03 +7 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 15176.83203125 +err_fin 13571.203125 +sparsity check 0.29999999489103046 +time 135.77 +8 self_attn.q_proj +Pruning ... +0.29998789727687836 0.09998787939548492 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908853016546297 0.9709505944546686 1.0 +err_prefin 300449.0 +err_fin 200402.9375 +sparsity check 0.29998789727687836 +time 74.93 +8 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 113408.46875 +err_fin 79877.375 +sparsity check 0.29999983310699463 +time 1.33 +8 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 25838.47265625 +err_fin 21134.3828125 +sparsity check 0.29999983310699463 +time 1.32 +8 self_attn.o_proj +Pruning ... +0.29325757920742035 0.09325756132602692 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1691787963723583 0.9709505944546686 1.0 +err_prefin 9909.5068359375 +err_fin 5343.287109375 +sparsity check 0.29325757920742035 +time 67.88 +8 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 664751.875 +err_fin 459721.625 +sparsity check 0.29999999489103046 +time 137.75 +8 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 628046.375 +err_fin 432942.5625 +sparsity check 0.29999999489103046 +time 138.19 +8 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 28026.904296875 +err_fin 25225.08203125 +sparsity check 0.29999999489103046 +time 135.61 +9 self_attn.q_proj +Pruning ... +0.29997682571411133 0.0999768078327179 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908502024971612 0.9709505944546686 1.0 +err_prefin 321847.90625 +err_fin 231912.484375 +sparsity check 0.29997682571411133 +time 74.95 +9 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 106476.0 +err_fin 83721.1953125 +sparsity check 0.29999983310699463 +time 1.34 +9 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 37827.9921875 +err_fin 32805.93359375 +sparsity check 0.29999983310699463 +time 1.31 +9 self_attn.o_proj +Pruning ... +0.28864505887031555 0.08864504098892212 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1538590273766385 0.9709505944546686 1.0 +err_prefin 5372.61865234375 +err_fin 2617.85302734375 +sparsity check 0.28864505887031555 +time 67.85 +9 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 839131.25 +err_fin 594295.25 +sparsity check 0.29999999489103046 +time 137.71 +9 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 788290.375 +err_fin 556090.75 +sparsity check 0.29999999489103046 +time 138.07 +9 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 26872.58203125 +err_fin 24459.40625 +sparsity check 0.29999999489103046 +time 135.75 +10 self_attn.q_proj +Pruning ... +0.29997682571411133 0.0999768078327179 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908502024971612 0.9709505944546686 1.0 +err_prefin 196492.09375 +err_fin 141329.734375 +sparsity check 0.29997682571411133 +time 74.94 +10 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 73369.4375 +err_fin 54307.40234375 +sparsity check 0.29999983310699463 +time 1.34 +10 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 37013.75 +err_fin 31610.794921875 +sparsity check 0.29999983310699463 +time 1.31 +10 self_attn.o_proj +Pruning ... +0.2763001024723053 0.07630008459091187 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1109343374362552 0.9709505944546686 1.0 +err_prefin 3928.4443359375 +err_fin 2009.77587890625 +sparsity check 0.2763001024723053 +time 67.87 +10 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1003815.6875 +err_fin 740524.0 +sparsity check 0.29999999489103046 +time 137.74 +10 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 939789.625 +err_fin 691014.8125 +sparsity check 0.29999999489103046 +time 138.18 +10 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 29326.943359375 +err_fin 26999.7421875 +sparsity check 0.29999999489103046 +time 135.68 +11 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 316608.9375 +err_fin 226435.78125 +sparsity check 0.29998879134655 +time 74.92 +11 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 122440.640625 +err_fin 89522.34375 +sparsity check 0.29999983310699463 +time 1.34 +11 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 44243.2734375 +err_fin 37031.53515625 +sparsity check 0.29999983310699463 +time 1.31 +11 self_attn.o_proj +Pruning ... +0.2790084034204483 0.07900838553905487 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1206036320433004 0.9709505944546686 1.0 +err_prefin 6518.07275390625 +err_fin 3281.354248046875 +sparsity check 0.2790084034204483 +time 67.88 +11 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1071501.5 +err_fin 798371.5 +sparsity check 0.29999999489103046 +time 137.76 +11 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1009022.8125 +err_fin 749514.125 +sparsity check 0.29999999489103046 +time 138.03 +11 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 32609.2109375 +err_fin 30018.953125 +sparsity check 0.29999999489103046 +time 135.73 +12 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 261794.4375 +err_fin 187452.65625 +sparsity check 0.29998879134655 +time 74.92 +12 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 109502.671875 +err_fin 80291.7421875 +sparsity check 0.29999983310699463 +time 1.33 +12 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 33745.0625 +err_fin 27522.99609375 +sparsity check 0.29999983310699463 +time 1.31 +12 self_attn.o_proj +Pruning ... +0.2816329002380371 0.08163288235664368 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1298349114545376 0.9709505944546686 1.0 +err_prefin 7192.89404296875 +err_fin 3595.00830078125 +sparsity check 0.2816329002380371 +time 67.88 +12 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1063842.5 +err_fin 794042.6875 +sparsity check 0.29999999489103046 +time 137.76 +12 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1013963.625 +err_fin 754538.5 +sparsity check 0.29999999489103046 +time 138.17 +12 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 36493.3984375 +err_fin 33421.99609375 +sparsity check 0.29999999489103046 +time 135.71 +13 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 352666.5 +err_fin 255618.4375 +sparsity check 0.29998879134655 +time 74.92 +13 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 123216.078125 +err_fin 92612.046875 +sparsity check 0.29999983310699463 +time 1.34 +13 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 44567.5234375 +err_fin 37710.37109375 +sparsity check 0.29999983310699463 +time 1.31 +13 self_attn.o_proj +Pruning ... +0.2752116918563843 0.07521167397499084 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1070064196849327 0.9709505944546686 1.0 +err_prefin 18506.728515625 +err_fin 10081.5458984375 +sparsity check 0.2752116918563843 +time 67.85 +13 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1141047.5 +err_fin 837738.125 +sparsity check 0.29999999489103046 +time 137.78 +13 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1097610.125 +err_fin 803344.0 +sparsity check 0.29999999489103046 +time 138.05 +13 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 48132.16015625 +err_fin 44068.17578125 +sparsity check 0.29999999489103046 +time 135.78 +14 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 577450.4375 +err_fin 423265.3125 +sparsity check 0.29998879134655 +time 74.95 +14 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 184470.28125 +err_fin 141238.21875 +sparsity check 0.29999983310699463 +time 1.34 +14 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 63341.81640625 +err_fin 54127.0234375 +sparsity check 0.29999983310699463 +time 1.31 +14 self_attn.o_proj +Pruning ... +0.2884829193353653 0.08848290145397186 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.153313695407239 0.9709505944546686 1.0 +err_prefin 23199.708984375 +err_fin 12293.0634765625 +sparsity check 0.2884829193353653 +time 67.84 +14 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1312165.5 +err_fin 990560.625 +sparsity check 0.29999999489103046 +time 137.75 +14 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1261362.0 +err_fin 949626.125 +sparsity check 0.29999999489103046 +time 138.17 +14 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 50792.34765625 +err_fin 46890.79296875 +sparsity check 0.29999999489103046 +time 135.65 +15 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 424490.4375 +err_fin 318170.1875 +sparsity check 0.29998879134655 +time 74.94 +15 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 135404.0625 +err_fin 105356.4765625 +sparsity check 0.29999983310699463 +time 1.34 +15 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 73779.875 +err_fin 64663.421875 +sparsity check 0.29999983310699463 +time 1.31 +15 self_attn.o_proj +Pruning ... +0.2687942236661911 0.06879420578479767 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.08333579718327 0.9709505944546686 1.0 +err_prefin 19743.36328125 +err_fin 11142.53125 +sparsity check 0.2687942236661911 +time 67.89 +15 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1412708.625 +err_fin 1071747.0 +sparsity check 0.29999999489103046 +time 137.77 +15 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1364112.125 +err_fin 1031497.375 +sparsity check 0.29999999489103046 +time 138.03 +15 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 58260.37890625 +err_fin 53685.796875 +sparsity check 0.29999999489103046 +time 135.68 +16 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 410965.8125 +err_fin 311465.84375 +sparsity check 0.29998879134655 +time 74.94 +16 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 150358.703125 +err_fin 116443.578125 +sparsity check 0.29999983310699463 +time 1.32 +16 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 68976.875 +err_fin 60407.609375 +sparsity check 0.29999983310699463 +time 1.31 +16 self_attn.o_proj +Pruning ... +0.2813430279493332 0.08134301006793976 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1288219119178806 0.9709505944546686 1.0 +err_prefin 17627.57421875 +err_fin 9628.939453125 +sparsity check 0.2813430279493332 +time 67.87 +16 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1482370.375 +err_fin 1138280.75 +sparsity check 0.29999999489103046 +time 137.75 +16 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1436231.375 +err_fin 1100404.25 +sparsity check 0.29999999489103046 +time 138.16 +16 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 58589.7890625 +err_fin 54233.8203125 +sparsity check 0.29999999489103046 +time 135.72 +17 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 526197.0 +err_fin 392983.5 +sparsity check 0.29998879134655 +time 74.91 +17 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 180334.875 +err_fin 137717.9375 +sparsity check 0.29999983310699463 +time 1.31 +17 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 62122.34765625 +err_fin 52657.75 +sparsity check 0.29999983310699463 +time 1.31 +17 self_attn.o_proj +Pruning ... +0.29516535997390747 0.09516534209251404 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1754081285736342 0.9709505944546686 1.0 +err_prefin 27624.837890625 +err_fin 13451.22265625 +sparsity check 0.29516535997390747 +time 67.87 +17 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1475431.25 +err_fin 1111972.75 +sparsity check 0.29999999489103046 +time 137.77 +17 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1433489.5 +err_fin 1076994.5 +sparsity check 0.29999999489103046 +time 137.99 +17 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 74916.5 +err_fin 67816.53125 +sparsity check 0.29999999489103046 +time 135.75 +18 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 502272.0 +err_fin 378369.78125 +sparsity check 0.29998879134655 +time 74.92 +18 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 164821.15625 +err_fin 128557.6875 +sparsity check 0.29999983310699463 +time 1.32 +18 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 91514.578125 +err_fin 81140.8828125 +sparsity check 0.29999983310699463 +time 1.31 +18 self_attn.o_proj +Pruning ... +0.2797379046678543 0.07973788678646088 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1231830746767248 0.9709505944546686 1.0 +err_prefin 28444.716796875 +err_fin 14717.9296875 +sparsity check 0.2797379046678543 +time 67.88 +18 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1614599.25 +err_fin 1230537.0 +sparsity check 0.29999999489103046 +time 137.76 +18 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1558061.25 +err_fin 1183164.75 +sparsity check 0.29999999489103046 +time 138.18 +18 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 78702.421875 +err_fin 71932.4375 +sparsity check 0.29999999489103046 +time 135.78 +19 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 576192.0 +err_fin 444806.375 +sparsity check 0.29998879134655 +time 74.91 +19 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 175761.90625 +err_fin 142242.375 +sparsity check 0.29999983310699463 +time 1.33 +19 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 106367.53125 +err_fin 96257.84375 +sparsity check 0.29999983310699463 +time 1.31 +19 self_attn.o_proj +Pruning ... +0.26657745242118835 0.06657743453979492 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.0749476349473879 0.9709505944546686 1.0 +err_prefin 22884.625 +err_fin 12479.001953125 +sparsity check 0.26657745242118835 +time 67.94 +19 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1743903.5 +err_fin 1336926.75 +sparsity check 0.29999999489103046 +time 137.75 +19 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1683411.375 +err_fin 1287939.625 +sparsity check 0.29999999489103046 +time 138.03 +19 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 80935.0 +err_fin 74760.453125 +sparsity check 0.29999999489103046 +time 135.74 +20 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 510591.09375 +err_fin 396992.5 +sparsity check 0.29998879134655 +time 74.94 +20 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 158409.21875 +err_fin 126875.9765625 +sparsity check 0.29999983310699463 +time 1.32 +20 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 86279.90625 +err_fin 78110.109375 +sparsity check 0.29999983310699463 +time 1.30 +20 self_attn.o_proj +Pruning ... +0.2718873471021652 0.07188732922077179 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.0948557784689537 0.9709505944546686 1.0 +err_prefin 19584.56640625 +err_fin 9591.591796875 +sparsity check 0.2718873471021652 +time 67.88 +20 mlp.gate_proj +Pruning ... +0.29999288490840365 0.19997508823871613 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060103493959285 0.9709505944546686 1.0 +err_prefin 1852608.875 +err_fin 1424023.625 +sparsity check 0.29999288490840365 +time 137.77 +20 mlp.up_proj +Pruning ... +0.29999288490840365 0.19997508823871613 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060103493959285 0.9709505944546686 1.0 +err_prefin 1789557.75 +err_fin 1372617.5 +sparsity check 0.29999288490840365 +time 138.16 +20 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 81356.515625 +err_fin 75384.609375 +sparsity check 0.29999999489103046 +time 135.72 +21 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 327392.375 +err_fin 251870.3125 +sparsity check 0.29998879134655 +time 74.97 +21 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 113291.296875 +err_fin 87867.2421875 +sparsity check 0.29999983310699463 +time 1.34 +21 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 75309.59375 +err_fin 66831.5078125 +sparsity check 0.29999983310699463 +time 1.32 +21 self_attn.o_proj +Pruning ... +0.26312975585460663 0.0631297379732132 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.0616736476882211 0.9709505944546686 1.0 +err_prefin 24898.46484375 +err_fin 13261.9765625 +sparsity check 0.26312975585460663 +time 67.88 +21 mlp.gate_proj +Pruning ... +0.29999288490840365 0.19997508823871613 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060103493959285 0.9709505944546686 1.0 +err_prefin 1920540.25 +err_fin 1477476.5 +sparsity check 0.29999288490840365 +time 137.77 +21 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1864768.75 +err_fin 1432040.25 +sparsity check 0.29999999489103046 +time 138.08 +21 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 84583.421875 +err_fin 78796.125 +sparsity check 0.29999999489103046 +time 135.67 +22 self_attn.q_proj +Pruning ... +0.29997682571411133 0.0999768078327179 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908502024971612 0.9709505944546686 1.0 +err_prefin 303901.75 +err_fin 236533.03125 +sparsity check 0.29997682571411133 +time 74.94 +22 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 120275.90625 +err_fin 97766.234375 +sparsity check 0.29999983310699463 +time 1.35 +22 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 83501.3203125 +err_fin 75249.4765625 +sparsity check 0.29999983310699463 +time 1.31 +22 self_attn.o_proj +Pruning ... +0.27621839940547943 0.076218381524086 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.110640328608706 0.9709505944546686 1.0 +err_prefin 16023.5283203125 +err_fin 8324.244140625 +sparsity check 0.27621839940547943 +time 67.91 +22 mlp.gate_proj +Pruning ... +0.29999288490840365 0.19997508823871613 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060103493959285 0.9709505944546686 1.0 +err_prefin 2001608.125 +err_fin 1537090.25 +sparsity check 0.29999288490840365 +time 137.77 +22 mlp.up_proj +Pruning ... +0.29999288490840365 0.19997508823871613 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060103493959285 0.9709505944546686 1.0 +err_prefin 1948519.75 +err_fin 1492786.75 +sparsity check 0.29999288490840365 +time 138.18 +22 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 86168.171875 +err_fin 80430.5234375 +sparsity check 0.29999999489103046 +time 135.58 +23 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 440584.78125 +err_fin 337327.71875 +sparsity check 0.29998879134655 +time 75.13 +23 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 167621.515625 +err_fin 133256.59375 +sparsity check 0.29999983310699463 +time 1.33 +23 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 89806.90625 +err_fin 80290.171875 +sparsity check 0.29999983310699463 +time 1.31 +23 self_attn.o_proj +Pruning ... +0.2784363329410553 0.07843631505966187 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1185734717779658 0.9709505944546686 1.0 +err_prefin 24210.25390625 +err_fin 12262.890625 +sparsity check 0.2784363329410553 +time 68.08 +23 mlp.gate_proj +Pruning ... +0.29999288490840365 0.19997508823871613 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060103493959285 0.9709505944546686 1.0 +err_prefin 2035726.5 +err_fin 1565690.5 +sparsity check 0.29999288490840365 +time 138.21 +23 mlp.up_proj +Pruning ... +0.29999288490840365 0.19997508823871613 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060103493959285 0.9709505944546686 1.0 +err_prefin 1986447.5 +err_fin 1523848.25 +sparsity check 0.29999288490840365 +time 138.51 +23 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 88675.1953125 +err_fin 82281.703125 +sparsity check 0.29999999489103046 +time 136.19 +24 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 391728.25 +err_fin 294734.8125 +sparsity check 0.29998879134655 +time 75.07 +24 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 149414.109375 +err_fin 115625.390625 +sparsity check 0.29999983310699463 +time 1.32 +24 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 69658.3125 +err_fin 59851.4921875 +sparsity check 0.29999983310699463 +time 1.31 +24 self_attn.o_proj +Pruning ... +0.2838824987411499 0.08388248085975647 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1376418772496466 0.9709505944546686 1.0 +err_prefin 20319.00390625 +err_fin 11150.955078125 +sparsity check 0.2838824987411499 +time 68.02 +24 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1986341.25 +err_fin 1530140.25 +sparsity check 0.29999999489103046 +time 138.15 +24 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1955876.25 +err_fin 1504181.625 +sparsity check 0.29999999489103046 +time 138.49 +24 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 90355.4375 +err_fin 83795.28125 +sparsity check 0.29999999489103046 +time 135.96 +25 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 463368.28125 +err_fin 349878.0 +sparsity check 0.29998879134655 +time 75.09 +25 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 173160.0625 +err_fin 133917.875 +sparsity check 0.29999983310699463 +time 1.33 +25 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 73108.625 +err_fin 62942.953125 +sparsity check 0.29999983310699463 +time 1.31 +25 self_attn.o_proj +Pruning ... +0.27712246775627136 0.07712244987487793 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1138860353396238 0.9709505944546686 1.0 +err_prefin 51899.3515625 +err_fin 28122.509765625 +sparsity check 0.27712246775627136 +time 68.04 +25 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1935228.25 +err_fin 1446518.875 +sparsity check 0.29999999489103046 +time 138.16 +25 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1924336.25 +err_fin 1433656.25 +sparsity check 0.29999999489103046 +time 138.43 +25 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 130152.5 +err_fin 118920.578125 +sparsity check 0.29999999489103046 +time 136.08 +26 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 668758.875 +err_fin 509217.65625 +sparsity check 0.29998879134655 +time 75.10 +26 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 225106.59375 +err_fin 179373.0625 +sparsity check 0.29999983310699463 +time 1.34 +26 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 113069.515625 +err_fin 97972.1328125 +sparsity check 0.29999983310699463 +time 1.30 +26 self_attn.o_proj +Pruning ... +0.28410089015960693 0.0841008722782135 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1383946818247657 0.9709505944546686 1.0 +err_prefin 61961.640625 +err_fin 33807.609375 +sparsity check 0.28410089015960693 +time 68.04 +26 mlp.gate_proj +Pruning ... +0.29999288490840365 0.19997508823871613 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060103493959285 0.9709505944546686 1.0 +err_prefin 2194201.0 +err_fin 1659860.75 +sparsity check 0.29999288490840365 +time 138.14 +26 mlp.up_proj +Pruning ... +0.29999288490840365 0.19997508823871613 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060103493959285 0.9709505944546686 1.0 +err_prefin 2169127.25 +err_fin 1636336.5 +sparsity check 0.29999288490840365 +time 138.55 +26 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 127526.078125 +err_fin 119189.890625 +sparsity check 0.29999999489103046 +time 136.01 +27 self_attn.q_proj +Pruning ... +0.29997682571411133 0.0999768078327179 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908502024971612 0.9709505944546686 1.0 +err_prefin 308544.71875 +err_fin 238041.40625 +sparsity check 0.29997682571411133 +time 75.09 +27 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 127479.53125 +err_fin 101545.5078125 +sparsity check 0.29999983310699463 +time 1.34 +27 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 91094.3125 +err_fin 82046.3125 +sparsity check 0.29999983310699463 +time 1.31 +27 self_attn.o_proj +Pruning ... +0.28035055100917816 0.08035053312778473 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1253412433754744 0.9709505944546686 1.0 +err_prefin 25478.0859375 +err_fin 12188.7265625 +sparsity check 0.28035055100917816 +time 68.03 +27 mlp.gate_proj +Pruning ... +0.29999288490840365 0.19997508823871613 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060103493959285 0.9709505944546686 1.0 +err_prefin 2319278.0 +err_fin 1761144.875 +sparsity check 0.29999288490840365 +time 138.14 +27 mlp.up_proj +Pruning ... +0.29999288490840365 0.19997508823871613 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060103493959285 0.9709505944546686 1.0 +err_prefin 2286411.0 +err_fin 1731594.875 +sparsity check 0.29999288490840365 +time 138.43 +27 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 134304.78125 +err_fin 125860.28125 +sparsity check 0.29999999489103046 +time 136.03 +28 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 556808.125 +err_fin 429353.5625 +sparsity check 0.29998879134655 +time 75.08 +28 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 198106.21875 +err_fin 163720.28125 +sparsity check 0.29999983310699463 +time 1.35 +28 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 141281.0625 +err_fin 126776.921875 +sparsity check 0.29999983310699463 +time 1.32 +28 self_attn.o_proj +Pruning ... +0.27586427330970764 0.07586425542831421 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1093644214291734 0.9709505944546686 1.0 +err_prefin 44421.6015625 +err_fin 24812.359375 +sparsity check 0.27586427330970764 +time 68.04 +28 mlp.gate_proj +Pruning ... +0.29999288490840365 0.19997508823871613 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060103493959285 0.9709505944546686 1.0 +err_prefin 2449181.25 +err_fin 1867147.25 +sparsity check 0.29999288490840365 +time 138.11 +28 mlp.up_proj +Pruning ... +0.29999288490840365 0.19997508823871613 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060103493959285 0.9709505944546686 1.0 +err_prefin 2413979.75 +err_fin 1834742.75 +sparsity check 0.29999288490840365 +time 138.53 +28 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 143911.40625 +err_fin 134980.046875 +sparsity check 0.29999999489103046 +time 136.01 +29 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 556052.3125 +err_fin 429065.28125 +sparsity check 0.29998879134655 +time 75.08 +29 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 190485.8125 +err_fin 154461.734375 +sparsity check 0.29999983310699463 +time 1.36 +29 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 150277.90625 +err_fin 134620.03125 +sparsity check 0.29999983310699463 +time 1.33 +29 self_attn.o_proj +Pruning ... +0.2676489055156708 0.06764888763427734 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.0790159956155794 0.9709505944546686 1.0 +err_prefin 41584.5546875 +err_fin 25433.015625 +sparsity check 0.2676489055156708 +time 68.04 +29 mlp.gate_proj +Pruning ... +0.29999288490840365 0.19997508823871613 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060103493959285 0.9709505944546686 1.0 +err_prefin 2596596.75 +err_fin 1990537.25 +sparsity check 0.29999288490840365 +time 138.13 +29 mlp.up_proj +Pruning ... +0.29999288490840365 0.19997508823871613 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060103493959285 0.9709505944546686 1.0 +err_prefin 2551580.75 +err_fin 1950915.0 +sparsity check 0.29999288490840365 +time 138.39 +29 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 151312.84375 +err_fin 142841.296875 +sparsity check 0.29999999489103046 +time 136.02 +30 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 284128.6875 +err_fin 221573.90625 +sparsity check 0.29998879134655 +time 75.09 +30 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 97484.125 +err_fin 79525.3671875 +sparsity check 0.29999983310699463 +time 1.34 +30 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 133623.0625 +err_fin 122049.625 +sparsity check 0.29999983310699463 +time 1.32 +30 self_attn.o_proj +Pruning ... +0.2699955552816391 0.06999553740024567 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.0878351277825802 0.9709505944546686 1.0 +err_prefin 19892.140625 +err_fin 11753.15625 +sparsity check 0.2699955552816391 +time 68.05 +30 mlp.gate_proj +Pruning ... +0.29999288490840365 0.19997508823871613 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060103493959285 0.9709505944546686 1.0 +err_prefin 2744453.0 +err_fin 2119522.25 +sparsity check 0.29999288490840365 +time 138.11 +30 mlp.up_proj +Pruning ... +0.29999288490840365 0.19997508823871613 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060103493959285 0.9709505944546686 1.0 +err_prefin 2685503.0 +err_fin 2068581.5 +sparsity check 0.29999288490840365 +time 138.54 +30 mlp.down_proj +Pruning ... +0.2999999906335558 0.19999997317790985 0.242857141154153 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245631764015 0.9709505944546686 1.0 +err_prefin 155463.21875 +err_fin 146958.765625 +sparsity check 0.2999999906335558 +time 136.01 +31 self_attn.q_proj +Pruning ... +0.2999767065048218 0.09997668862342834 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908498245682169 0.9709505944546686 1.0 +err_prefin 586435.4375 +err_fin 456473.5 +sparsity check 0.2999767065048218 +time 75.10 +31 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 190119.671875 +err_fin 157454.9375 +sparsity check 0.29999983310699463 +time 1.33 +31 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 151328.828125 +err_fin 133361.0625 +sparsity check 0.29999983310699463 +time 1.31 +31 self_attn.o_proj +Pruning ... +0.26509250700473785 0.06509248912334442 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.069265072491063 0.9709505944546686 1.0 +err_prefin 47542.30859375 +err_fin 28039.8203125 +sparsity check 0.26509250700473785 +time 68.04 +31 mlp.gate_proj +Pruning ... +0.29998569403375897 0.19994992017745972 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0059959652152723 0.9709505944546686 1.0 +err_prefin 2856960.5 +err_fin 2226228.75 +sparsity check 0.29998569403375897 +time 138.13 +31 mlp.up_proj +Pruning ... +0.29998569403375897 0.19994992017745972 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0059959652152723 0.9709505944546686 1.0 +err_prefin 2789057.25 +err_fin 2168888.5 +sparsity check 0.29998569403375897 +time 138.42 +31 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 156633.078125 +err_fin 148351.5 +sparsity check 0.29999999489103046 +time 136.09 +32 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 645674.375 +err_fin 505763.3125 +sparsity check 0.29998879134655 +time 75.09 +32 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 200602.46875 +err_fin 168077.53125 +sparsity check 0.29999983310699463 +time 1.32 +32 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 132714.8125 +err_fin 120198.78125 +sparsity check 0.29999983310699463 +time 1.33 +32 self_attn.o_proj +Pruning ... +0.2810487300157547 0.08104871213436127 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1277917867336793 0.9709505944546686 1.0 +err_prefin 31878.662109375 +err_fin 17792.8984375 +sparsity check 0.2810487300157547 +time 68.04 +32 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 2937329.5 +err_fin 2285798.75 +sparsity check 0.29999999489103046 +time 138.12 +32 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 2874004.5 +err_fin 2231448.0 +sparsity check 0.29999999489103046 +time 138.53 +32 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 167017.1875 +err_fin 157694.75 +sparsity check 0.29999999489103046 +time 136.01 +33 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 921647.1875 +err_fin 715591.625 +sparsity check 0.29998879134655 +time 75.09 +33 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 260694.234375 +err_fin 211985.234375 +sparsity check 0.29999983310699463 +time 1.34 +33 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 152820.75 +err_fin 135232.0625 +sparsity check 0.29999983310699463 +time 1.33 +33 self_attn.o_proj +Pruning ... +0.27306830883026123 0.0730682909488678 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.0991991034389423 0.9709505944546686 1.0 +err_prefin 76522.5625 +err_fin 41888.80078125 +sparsity check 0.27306830883026123 +time 68.02 +33 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 2978824.0 +err_fin 2269977.0 +sparsity check 0.29999999489103046 +time 138.14 +33 mlp.up_proj +Pruning ... +0.29998569403375897 0.19994992017745972 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0059959652152723 0.9709505944546686 1.0 +err_prefin 2927499.0 +err_fin 2225089.0 +sparsity check 0.29998569403375897 +time 138.38 +33 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 193353.75 +err_fin 181045.84375 +sparsity check 0.29999999489103046 +time 136.04 +34 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 344702.53125 +err_fin 260525.96875 +sparsity check 0.29998879134655 +time 75.08 +34 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 135257.96875 +err_fin 103439.6875 +sparsity check 0.29999983310699463 +time 1.33 +34 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 92529.46875 +err_fin 79457.9375 +sparsity check 0.29999983310699463 +time 1.33 +34 self_attn.o_proj +Pruning ... +0.2805570214986801 0.08055700361728668 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1260669232643261 0.9709505944546686 1.0 +err_prefin 44281.296875 +err_fin 23647.220703125 +sparsity check 0.2805570214986801 +time 68.05 +34 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 3085606.5 +err_fin 2370264.5 +sparsity check 0.29999999489103046 +time 138.09 +34 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 3029326.0 +err_fin 2321283.5 +sparsity check 0.29999999489103046 +time 138.51 +34 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 210513.625 +err_fin 198454.890625 +sparsity check 0.29999999489103046 +time 136.04 +35 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 556998.375 +err_fin 429104.375 +sparsity check 0.29998879134655 +time 75.08 +35 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 172997.640625 +err_fin 135932.15625 +sparsity check 0.29999983310699463 +time 1.32 +35 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 124336.2734375 +err_fin 109908.96875 +sparsity check 0.29999983310699463 +time 1.32 +35 self_attn.o_proj +Pruning ... +0.26645298302173615 0.06645296514034271 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.0744732981503389 0.9709505944546686 1.0 +err_prefin 56546.22265625 +err_fin 28763.7421875 +sparsity check 0.26645298302173615 +time 68.06 +35 mlp.gate_proj +Pruning ... +0.29998569403375897 0.19994992017745972 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0059959652152723 0.9709505944546686 1.0 +err_prefin 3277307.0 +err_fin 2514377.0 +sparsity check 0.29998569403375897 +time 138.10 +35 mlp.up_proj +Pruning ... +0.29998569403375897 0.19994992017745972 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0059959652152723 0.9709505944546686 1.0 +err_prefin 3213722.0 +err_fin 2459371.5 +sparsity check 0.29998569403375897 +time 138.40 +35 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 225026.625 +err_fin 212251.28125 +sparsity check 0.29999999489103046 +time 136.07 +36 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 535441.875 +err_fin 411742.625 +sparsity check 0.29998879134655 +time 75.07 +36 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 167991.1875 +err_fin 133753.921875 +sparsity check 0.29999983310699463 +time 1.35 +36 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 118896.40625 +err_fin 105938.703125 +sparsity check 0.29999983310699463 +time 1.32 +36 self_attn.o_proj +Pruning ... +0.27251090109348297 0.07251088321208954 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.0971528059664624 0.9709505944546686 1.0 +err_prefin 40434.15234375 +err_fin 20990.693359375 +sparsity check 0.27251090109348297 +time 68.04 +36 mlp.gate_proj +Pruning ... +0.29998569403375897 0.19994992017745972 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0059959652152723 0.9709505944546686 1.0 +err_prefin 3363074.75 +err_fin 2574906.75 +sparsity check 0.29998569403375897 +time 138.12 +36 mlp.up_proj +Pruning ... +0.29998569403375897 0.19994992017745972 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0059959652152723 0.9709505944546686 1.0 +err_prefin 3297823.5 +err_fin 2520323.5 +sparsity check 0.29998569403375897 +time 138.51 +36 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 251172.375 +err_fin 236097.359375 +sparsity check 0.29999999489103046 +time 136.04 +37 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 850409.75 +err_fin 653630.625 +sparsity check 0.29998879134655 +time 75.11 +37 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 238505.765625 +err_fin 191663.96875 +sparsity check 0.29999983310699463 +time 1.33 +37 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 154456.4375 +err_fin 137949.3125 +sparsity check 0.29999983310699463 +time 1.31 +37 self_attn.o_proj +Pruning ... +0.27937693893909454 0.07937692105770111 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1219080495563976 0.9709505944546686 1.0 +err_prefin 73303.3359375 +err_fin 35917.9765625 +sparsity check 0.27937693893909454 +time 68.02 +37 mlp.gate_proj +Pruning ... +0.29998569403375897 0.19994992017745972 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0059959652152723 0.9709505944546686 1.0 +err_prefin 3538460.0 +err_fin 2698741.0 +sparsity check 0.29998569403375897 +time 138.13 +37 mlp.up_proj +Pruning ... +0.29998569403375897 0.19994992017745972 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0059959652152723 0.9709505944546686 1.0 +err_prefin 3471317.0 +err_fin 2641933.0 +sparsity check 0.29998569403375897 +time 138.40 +37 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 276686.9375 +err_fin 259926.734375 +sparsity check 0.29999999489103046 +time 136.05 +38 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 954035.625 +err_fin 723900.5 +sparsity check 0.29998879134655 +time 75.10 +38 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 264393.375 +err_fin 206396.0 +sparsity check 0.29999983310699463 +time 1.34 +38 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 168857.5 +err_fin 146324.8125 +sparsity check 0.29999983310699463 +time 1.30 +38 self_attn.o_proj +Pruning ... +0.28340384364128113 0.0834038257598877 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1359887977706427 0.9709505944546686 1.0 +err_prefin 137350.9375 +err_fin 67391.84375 +sparsity check 0.28340384364128113 +time 68.01 +38 mlp.gate_proj +Pruning ... +0.2999928040163858 0.19997480511665344 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060101875936183 0.9709505944546686 1.0 +err_prefin 3666757.5 +err_fin 2786624.0 +sparsity check 0.2999928040163858 +time 138.11 +38 mlp.up_proj +Pruning ... +0.2999928040163858 0.19997480511665344 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060101875936183 0.9709505944546686 1.0 +err_prefin 3605587.5 +err_fin 2732781.5 +sparsity check 0.2999928040163858 +time 138.52 +38 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 314325.625 +err_fin 294774.9375 +sparsity check 0.29999999489103046 +time 136.01 +39 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 1158776.625 +err_fin 881585.875 +sparsity check 0.29998879134655 +time 75.09 +39 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 324029.9375 +err_fin 263872.8125 +sparsity check 0.29999983310699463 +time 1.33 +39 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 184303.484375 +err_fin 160006.0625 +sparsity check 0.29999983310699463 +time 1.31 +39 self_attn.o_proj +Pruning ... +0.289878711104393 0.08987869322299957 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1579929118461283 0.9709505944546686 1.0 +err_prefin 147869.84375 +err_fin 72714.1953125 +sparsity check 0.289878711104393 +time 68.02 +39 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 3731458.0 +err_fin 2807082.0 +sparsity check 0.29999999489103046 +time 138.11 +39 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 3675048.5 +err_fin 2759898.25 +sparsity check 0.29999999489103046 +time 138.39 +39 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 381719.96875 +err_fin 353688.625 +sparsity check 0.29999999489103046 +time 136.06 +40 self_attn.q_proj +Pruning ... +0.29999998211860657 0.09999996423721313 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1909236108740666 0.9709505944546686 1.0 +err_prefin 1080544.125 +err_fin 814920.75 +sparsity check 0.29999998211860657 +time 74.94 +40 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 283680.875 +err_fin 229947.625 +sparsity check 0.29999983310699463 +time 1.33 +40 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 210013.09375 +err_fin 180032.53125 +sparsity check 0.29999983310699463 +time 1.31 +40 self_attn.o_proj +Pruning ... +0.28795187175273895 0.08795185387134552 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1515243001537314 0.9709505944546686 1.0 +err_prefin 196848.125 +err_fin 111018.8984375 +sparsity check 0.28795187175273895 +time 67.94 +40 mlp.gate_proj +Pruning ... +0.2999928040163858 0.19997480511665344 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060101875936183 0.9709505944546686 1.0 +err_prefin 3907574.0 +err_fin 2907535.5 +sparsity check 0.2999928040163858 +time 137.83 +40 mlp.up_proj +Pruning ... +0.2999928040163858 0.19997480511665344 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060101875936183 0.9709505944546686 1.0 +err_prefin 3837530.0 +err_fin 2850688.0 +sparsity check 0.2999928040163858 +time 138.23 +40 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 432315.1875 +err_fin 402098.21875 +sparsity check 0.29999999489103046 +time 135.73 +41 self_attn.q_proj +Pruning ... +0.29999998211860657 0.09999996423721313 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1909236108740666 0.9709505944546686 1.0 +err_prefin 907988.75 +err_fin 672588.875 +sparsity check 0.29999998211860657 +time 74.94 +41 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 247021.3125 +err_fin 195653.671875 +sparsity check 0.29999983310699463 +time 1.35 +41 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 182044.78125 +err_fin 156797.625 +sparsity check 0.29999983310699463 +time 1.31 +41 self_attn.o_proj +Pruning ... +0.28459352254867554 0.0845935046672821 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1400895297749696 0.9709505944546686 1.0 +err_prefin 198913.25 +err_fin 98080.21875 +sparsity check 0.28459352254867554 +time 67.90 +41 mlp.gate_proj +Pruning ... +0.29998569403375897 0.19994992017745972 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0059959652152723 0.9709505944546686 1.0 +err_prefin 4106484.5 +err_fin 3000634.5 +sparsity check 0.29998569403375897 +time 137.83 +41 mlp.up_proj +Pruning ... +0.29998569403375897 0.19994992017745972 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0059959652152723 0.9709505944546686 1.0 +err_prefin 3958873.0 +err_fin 2883165.0 +sparsity check 0.29998569403375897 +time 138.12 +41 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 497553.34375 +err_fin 460157.5625 +sparsity check 0.29999999489103046 +time 135.83 +42 self_attn.q_proj +Pruning ... +0.29999998211860657 0.09999996423721313 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1909236108740666 0.9709505944546686 1.0 +err_prefin 1032662.0625 +err_fin 763616.0 +sparsity check 0.29999998211860657 +time 74.92 +42 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 254862.5 +err_fin 202071.84375 +sparsity check 0.29999983310699463 +time 1.32 +42 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 222320.03125 +err_fin 193921.90625 +sparsity check 0.29999983310699463 +time 1.33 +42 self_attn.o_proj +Pruning ... +0.2862556427717209 0.08625562489032745 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1457746488308302 0.9709505944546686 1.0 +err_prefin 187395.953125 +err_fin 97517.015625 +sparsity check 0.2862556427717209 +time 67.89 +42 mlp.gate_proj +Pruning ... +0.2999928040163858 0.19997480511665344 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060101875936183 0.9709505944546686 1.0 +err_prefin 4441402.0 +err_fin 3230147.5 +sparsity check 0.2999928040163858 +time 137.85 +42 mlp.up_proj +Pruning ... +0.2999928040163858 0.19997480511665344 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060101875936183 0.9709505944546686 1.0 +err_prefin 4174835.5 +err_fin 3027989.0 +sparsity check 0.2999928040163858 +time 138.24 +42 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 527780.8125 +err_fin 488658.875 +sparsity check 0.29999999489103046 +time 135.76 +43 self_attn.q_proj +Pruning ... +0.29999998211860657 0.09999996423721313 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1909236108740666 0.9709505944546686 1.0 +err_prefin 718667.75 +err_fin 530635.625 +sparsity check 0.29999998211860657 +time 74.98 +43 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 224979.0 +err_fin 175419.859375 +sparsity check 0.29999983310699463 +time 1.33 +43 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 181082.515625 +err_fin 152621.78125 +sparsity check 0.29999983310699463 +time 1.33 +43 self_attn.o_proj +Pruning ... +0.2754656672477722 0.07546564936637878 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1079251663770824 0.9709505944546686 1.0 +err_prefin 137791.5625 +err_fin 76316.640625 +sparsity check 0.2754656672477722 +time 67.88 +43 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 4625762.0 +err_fin 3350013.25 +sparsity check 0.29999999489103046 +time 137.81 +43 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 4295103.0 +err_fin 3105253.75 +sparsity check 0.29999999489103046 +time 138.11 +43 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 566437.5 +err_fin 524485.25 +sparsity check 0.29999999489103046 +time 135.82 +44 self_attn.q_proj +Pruning ... +0.29999998211860657 0.09999996423721313 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1909236108740666 0.9709505944546686 1.0 +err_prefin 1223547.5 +err_fin 913486.75 +sparsity check 0.29999998211860657 +time 74.92 +44 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 249873.28125 +err_fin 202698.40625 +sparsity check 0.29999983310699463 +time 1.33 +44 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 269607.875 +err_fin 237971.28125 +sparsity check 0.29999983310699463 +time 1.31 +44 self_attn.o_proj +Pruning ... +0.2958831340074539 0.09588311612606049 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1777359749883376 0.9709505944546686 1.0 +err_prefin 359362.0 +err_fin 208121.984375 +sparsity check 0.2958831340074539 +time 67.87 +44 mlp.gate_proj +Pruning ... +0.2999928040163858 0.19997480511665344 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060101875936183 0.9709505944546686 1.0 +err_prefin 5070381.0 +err_fin 3646746.5 +sparsity check 0.2999928040163858 +time 137.76 +44 mlp.up_proj +Pruning ... +0.2999928040163858 0.19997480511665344 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060101875936183 0.9709505944546686 1.0 +err_prefin 4542354.5 +err_fin 3257371.0 +sparsity check 0.2999928040163858 +time 138.19 +44 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 639691.75 +err_fin 585614.625 +sparsity check 0.29999999489103046 +time 135.71 +45 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 804759.8125 +err_fin 592195.125 +sparsity check 0.29998879134655 +time 74.91 +45 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 232416.375 +err_fin 185826.71875 +sparsity check 0.29999983310699463 +time 1.33 +45 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 245836.375 +err_fin 219475.59375 +sparsity check 0.29999983310699463 +time 1.32 +45 self_attn.o_proj +Pruning ... +0.28903020918369293 0.0890301913022995 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1551525418871482 0.9709505944546686 1.0 +err_prefin 91797.03125 +err_fin 53978.2421875 +sparsity check 0.28903020918369293 +time 67.85 +45 mlp.gate_proj +Pruning ... +0.2999928040163858 0.19997480511665344 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060101875936183 0.9709505944546686 1.0 +err_prefin 5468754.5 +err_fin 3953727.5 +sparsity check 0.2999928040163858 +time 137.84 +45 mlp.up_proj +Pruning ... +0.2999858260154724 0.19995038211345673 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0059962292380278 0.9709505944546686 1.0 +err_prefin 4828201.0 +err_fin 3482285.25 +sparsity check 0.2999858260154724 +time 138.10 +45 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 641580.75 +err_fin 590428.5 +sparsity check 0.29999999489103046 +time 135.82 +46 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 664021.5 +err_fin 493468.5 +sparsity check 0.29998879134655 +time 74.93 +46 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 183038.40625 +err_fin 150695.421875 +sparsity check 0.29999983310699463 +time 1.33 +46 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 224350.125 +err_fin 199914.984375 +sparsity check 0.29999983310699463 +time 1.33 +46 self_attn.o_proj +Pruning ... +0.27695709466934204 0.07695707678794861 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1132935707778988 0.9709505944546686 1.0 +err_prefin 146060.875 +err_fin 87849.25 +sparsity check 0.27695709466934204 +time 67.90 +46 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 5835415.5 +err_fin 4269036.5 +sparsity check 0.29999999489103046 +time 137.83 +46 mlp.up_proj +Pruning ... +0.2999928040163858 0.19997480511665344 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060101875936183 0.9709505944546686 1.0 +err_prefin 5071733.5 +err_fin 3702408.5 +sparsity check 0.2999928040163858 +time 138.26 +46 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 639784.0 +err_fin 593456.375 +sparsity check 0.29999999489103046 +time 135.66 +47 self_attn.q_proj +Pruning ... +0.29999998211860657 0.09999996423721313 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1909236108740666 0.9709505944546686 1.0 +err_prefin 919380.0 +err_fin 682234.1875 +sparsity check 0.29999998211860657 +time 74.93 +47 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 263575.78125 +err_fin 212870.53125 +sparsity check 0.29999983310699463 +time 1.33 +47 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 207595.1875 +err_fin 184903.96875 +sparsity check 0.29999983310699463 +time 1.32 +47 self_attn.o_proj +Pruning ... +0.29235348105430603 0.0923534631729126 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1662050953881062 0.9709505944546686 1.0 +err_prefin 219744.5 +err_fin 105917.7890625 +sparsity check 0.29235348105430603 +time 67.89 +47 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 6036848.5 +err_fin 4374001.0 +sparsity check 0.29999999489103046 +time 137.82 +47 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 5174902.5 +err_fin 3741000.0 +sparsity check 0.29999999489103046 +time 138.02 +47 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 716123.625 +err_fin 657202.25 +sparsity check 0.29999999489103046 +time 135.77 +48 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 465647.5 +err_fin 344852.84375 +sparsity check 0.29998879134655 +time 74.94 +48 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 124884.1875 +err_fin 100681.1484375 +sparsity check 0.29999983310699463 +time 1.33 +48 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 205058.0 +err_fin 184559.46875 +sparsity check 0.29999983310699463 +time 1.31 +48 self_attn.o_proj +Pruning ... +0.276949867606163 0.07694984972476959 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1132676664885035 0.9709505944546686 1.0 +err_prefin 133447.375 +err_fin 79014.34375 +sparsity check 0.276949867606163 +time 67.88 +48 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 6292413.0 +err_fin 4523682.5 +sparsity check 0.29999999489103046 +time 137.80 +48 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 5364602.5 +err_fin 3850003.5 +sparsity check 0.29999999489103046 +time 138.22 +48 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 709427.125 +err_fin 652958.375 +sparsity check 0.29999999489103046 +time 135.74 +49 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 270561.71875 +err_fin 199012.203125 +sparsity check 0.29998879134655 +time 74.94 +49 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 102232.015625 +err_fin 81923.0 +sparsity check 0.29999983310699463 +time 1.34 +49 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 164399.0625 +err_fin 144228.734375 +sparsity check 0.29999983310699463 +time 1.30 +49 self_attn.o_proj +Pruning ... +0.2970356047153473 0.09703558683395386 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1814556951776876 0.9709505944546686 1.0 +err_prefin 45655.078125 +err_fin 25116.39453125 +sparsity check 0.2970356047153473 +time 67.83 +49 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 6509316.0 +err_fin 4686928.0 +sparsity check 0.29999999489103046 +time 137.89 +49 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 5540761.5 +err_fin 3985135.25 +sparsity check 0.29999999489103046 +time 138.16 +49 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 713351.625 +err_fin 659329.75 +sparsity check 0.29999999489103046 +time 135.90 +50 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 404233.15625 +err_fin 298731.5625 +sparsity check 0.29998879134655 +time 74.92 +50 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 113135.2421875 +err_fin 91920.625 +sparsity check 0.29999983310699463 +time 1.34 +50 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 211036.875 +err_fin 186140.515625 +sparsity check 0.29999983310699463 +time 1.31 +50 self_attn.o_proj +Pruning ... +0.2647864520549774 0.06478643417358398 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.0680874055562972 0.9709505944546686 1.0 +err_prefin 101116.8828125 +err_fin 63962.734375 +sparsity check 0.2647864520549774 +time 67.88 +50 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 6731093.5 +err_fin 4858090.5 +sparsity check 0.29999999489103046 +time 137.83 +50 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 5718403.0 +err_fin 4119040.25 +sparsity check 0.29999999489103046 +time 138.23 +50 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 717476.125 +err_fin 664461.75 +sparsity check 0.29999999489103046 +time 135.78 +51 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 523168.25 +err_fin 385879.96875 +sparsity check 0.29998879134655 +time 74.94 +51 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 152884.21875 +err_fin 124647.375 +sparsity check 0.29999983310699463 +time 1.31 +51 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 219848.25 +err_fin 195624.703125 +sparsity check 0.29999983310699463 +time 1.32 +51 self_attn.o_proj +Pruning ... +0.26690903306007385 0.06690901517868042 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.0762094923420098 0.9709505944546686 1.0 +err_prefin 110817.078125 +err_fin 62859.36328125 +sparsity check 0.26690903306007385 +time 67.88 +51 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 6899877.0 +err_fin 5000642.0 +sparsity check 0.29999999489103046 +time 137.77 +51 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 5879860.5 +err_fin 4254378.0 +sparsity check 0.29999999489103046 +time 138.03 +51 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 725606.75 +err_fin 673433.75 +sparsity check 0.29999999489103046 +time 135.80 +52 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 841074.1875 +err_fin 626901.625 +sparsity check 0.29998879134655 +time 74.94 +52 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 178144.6875 +err_fin 148127.53125 +sparsity check 0.29999983310699463 +time 1.33 +52 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 261927.0 +err_fin 233192.109375 +sparsity check 0.29999983310699463 +time 1.31 +52 self_attn.o_proj +Pruning ... +0.2869969606399536 0.08699694275856018 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1482938937343352 0.9709505944546686 1.0 +err_prefin 246878.921875 +err_fin 135332.0 +sparsity check 0.2869969606399536 +time 67.87 +52 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 7014948.0 +err_fin 5115490.5 +sparsity check 0.29999999489103046 +time 137.76 +52 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 5968485.0 +err_fin 4344353.5 +sparsity check 0.29999999489103046 +time 138.18 +52 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 731476.75 +err_fin 680262.75 +sparsity check 0.29999999489103046 +time 135.72 +53 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 281445.96875 +err_fin 210045.75 +sparsity check 0.29998879134655 +time 74.94 +53 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 79969.4296875 +err_fin 65543.421875 +sparsity check 0.29999983310699463 +time 1.32 +53 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 201788.796875 +err_fin 181084.875 +sparsity check 0.29999983310699463 +time 1.31 +53 self_attn.o_proj +Pruning ... +0.27482394874095917 0.07482393085956573 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1056011912180657 0.9709505944546686 1.0 +err_prefin 58068.44921875 +err_fin 35820.52734375 +sparsity check 0.27482394874095917 +time 67.90 +53 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 7190984.5 +err_fin 5225492.0 +sparsity check 0.29999999489103046 +time 137.78 +53 mlp.up_proj +Pruning ... +0.2999930168901171 0.19997555017471313 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060106133887283 0.9709505944546686 1.0 +err_prefin 6130041.0 +err_fin 4447478.0 +sparsity check 0.2999930168901171 +time 138.07 +53 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 743852.6875 +err_fin 691739.75 +sparsity check 0.29999999489103046 +time 135.77 +54 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 329570.0625 +err_fin 244857.578125 +sparsity check 0.29998879134655 +time 74.92 +54 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 90746.875 +err_fin 73247.140625 +sparsity check 0.2999997138977051 +time 1.34 +54 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 209753.625 +err_fin 187811.0625 +sparsity check 0.29999983310699463 +time 1.31 +54 self_attn.o_proj +Pruning ... +0.2679605334997177 0.06796051561832428 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.0801943159570713 0.9709505944546686 1.0 +err_prefin 84472.953125 +err_fin 50208.2265625 +sparsity check 0.2679605334997177 +time 67.88 +54 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 7337865.0 +err_fin 5350986.0 +sparsity check 0.29999999489103046 +time 137.79 +54 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 6279685.0 +err_fin 4569357.5 +sparsity check 0.29999999489103046 +time 138.18 +54 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 748287.25 +err_fin 697182.0625 +sparsity check 0.29999999489103046 +time 135.71 +55 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 525178.1875 +err_fin 394429.0625 +sparsity check 0.29998879134655 +time 74.93 +55 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 146428.703125 +err_fin 121194.65625 +sparsity check 0.29999983310699463 +time 1.34 +55 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 271352.25 +err_fin 244146.5625 +sparsity check 0.29999983310699463 +time 1.31 +55 self_attn.o_proj +Pruning ... +0.2537457197904587 0.05374570190906525 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.0240283496937537 0.9709505944546686 1.0 +err_prefin 138155.59375 +err_fin 88221.515625 +sparsity check 0.2537457197904587 +time 67.87 +55 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 7445884.0 +err_fin 5445626.5 +sparsity check 0.29999999489103046 +time 137.86 +55 mlp.up_proj +Pruning ... +0.29999276995658875 0.1999746859073639 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060101194662678 0.9709505944546686 1.0 +err_prefin 6423024.0 +err_fin 4690568.5 +sparsity check 0.29999276995658875 +time 138.13 +55 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 762954.875 +err_fin 712664.8125 +sparsity check 0.29999999489103046 +time 135.84 +56 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 383869.125 +err_fin 287241.75 +sparsity check 0.29998879134655 +time 74.92 +56 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 117954.28125 +err_fin 95604.90625 +sparsity check 0.29999983310699463 +time 1.36 +56 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 192813.5625 +err_fin 171541.8125 +sparsity check 0.29999983310699463 +time 1.32 +56 self_attn.o_proj +Pruning ... +0.2789105176925659 0.07891049981117249 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1202567170885263 0.9709505944546686 1.0 +err_prefin 71235.9375 +err_fin 40052.0546875 +sparsity check 0.2789105176925659 +time 67.89 +56 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 7519281.0 +err_fin 5496486.5 +sparsity check 0.29999999489103046 +time 137.82 +56 mlp.up_proj +Pruning ... +0.29999276995658875 0.1999746859073639 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060101194662678 0.9709505944546686 1.0 +err_prefin 6514655.0 +err_fin 4749463.0 +sparsity check 0.29999276995658875 +time 138.22 +56 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 774782.625 +err_fin 723765.0625 +sparsity check 0.29999999489103046 +time 135.74 +57 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 364376.03125 +err_fin 273175.5 +sparsity check 0.29998879134655 +time 74.93 +57 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 105951.109375 +err_fin 88166.7890625 +sparsity check 0.29999983310699463 +time 1.32 +57 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 245037.0625 +err_fin 220778.203125 +sparsity check 0.29999983310699463 +time 1.30 +57 self_attn.o_proj +Pruning ... +0.2683565765619278 0.06835655868053436 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.0816886340616965 0.9709505944546686 1.0 +err_prefin 79624.2109375 +err_fin 50351.03515625 +sparsity check 0.2683565765619278 +time 67.86 +57 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 7758609.0 +err_fin 5682120.0 +sparsity check 0.29999999489103046 +time 137.79 +57 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 6733990.0 +err_fin 4919042.0 +sparsity check 0.29999999489103046 +time 138.09 +57 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 794832.6875 +err_fin 743152.625 +sparsity check 0.29999999489103046 +time 135.81 +58 self_attn.q_proj +Pruning ... +0.29998789727687836 0.09998787939548492 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908853016546297 0.9709505944546686 1.0 +err_prefin 205269.765625 +err_fin 153413.3125 +sparsity check 0.29998789727687836 +time 74.96 +58 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 65268.65625 +err_fin 51892.671875 +sparsity check 0.29999983310699463 +time 1.34 +58 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 176897.375 +err_fin 157272.359375 +sparsity check 0.29999983310699463 +time 1.31 +58 self_attn.o_proj +Pruning ... +0.292335569858551 0.09233555197715759 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1661460413382314 0.9709505944546686 1.0 +err_prefin 52765.92578125 +err_fin 30778.25 +sparsity check 0.292335569858551 +time 67.83 +58 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 7901502.0 +err_fin 5800598.0 +sparsity check 0.29999999489103046 +time 137.85 +58 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 6873436.0 +err_fin 5035017.5 +sparsity check 0.29999999489103046 +time 138.24 +58 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 804072.6875 +err_fin 753086.125 +sparsity check 0.29999999489103046 +time 135.76 +59 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 261189.328125 +err_fin 195252.34375 +sparsity check 0.29998879134655 +time 74.96 +59 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 78628.265625 +err_fin 63917.546875 +sparsity check 0.29999983310699463 +time 1.32 +59 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 198882.34375 +err_fin 175707.046875 +sparsity check 0.29999983310699463 +time 1.31 +59 self_attn.o_proj +Pruning ... +0.29765182733535767 0.09765180945396423 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1834356305698859 0.9709505944546686 1.0 +err_prefin 53340.76953125 +err_fin 29668.001953125 +sparsity check 0.29765182733535767 +time 67.83 +59 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 8060360.0 +err_fin 5930122.5 +sparsity check 0.29999999489103046 +time 137.81 +59 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 7034945.5 +err_fin 5161545.0 +sparsity check 0.29999999489103046 +time 138.11 +59 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 809505.5625 +err_fin 759982.375 +sparsity check 0.29999999489103046 +time 135.83 +60 self_attn.q_proj +Pruning ... +0.29995179176330566 0.09995177388191223 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1907708324184256 0.9709505944546686 1.0 +err_prefin 43336.16015625 +err_fin 32273.69140625 +sparsity check 0.29995179176330566 +time 74.93 +60 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 16123.9609375 +err_fin 13001.099609375 +sparsity check 0.29999983310699463 +time 1.33 +60 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 104462.84375 +err_fin 92991.0859375 +sparsity check 0.29999983310699463 +time 1.31 +60 self_attn.o_proj +Pruning ... +0.2995652109384537 0.09956519305706024 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1895439015964504 0.9709505944546686 1.0 +err_prefin 29143.748046875 +err_fin 17555.71484375 +sparsity check 0.2995652109384537 +time 67.82 +60 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 8142654.0 +err_fin 6016490.5 +sparsity check 0.29999999489103046 +time 137.86 +60 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 7137665.0 +err_fin 5262851.5 +sparsity check 0.29999999489103046 +time 138.31 +60 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 804617.5625 +err_fin 756961.75 +sparsity check 0.29999999489103046 +time 135.77 +61 self_attn.q_proj +Pruning ... +0.2999650239944458 0.09996500611305237 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908127864263665 0.9709505944546686 1.0 +err_prefin 167471.125 +err_fin 126957.0859375 +sparsity check 0.2999650239944458 +time 74.94 +61 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 54862.91796875 +err_fin 44433.5234375 +sparsity check 0.29999983310699463 +time 1.33 +61 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 176642.21875 +err_fin 159432.40625 +sparsity check 0.29999983310699463 +time 1.33 +61 self_attn.o_proj +Pruning ... +0.296064168214798 0.09606415033340454 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.178321739235102 0.9709505944546686 1.0 +err_prefin 39242.359375 +err_fin 23668.04296875 +sparsity check 0.296064168214798 +time 67.81 +61 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 8267901.0 +err_fin 6111480.0 +sparsity check 0.29999999489103046 +time 137.77 +61 mlp.up_proj +Pruning ... +0.29999276995658875 0.1999746859073639 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060101194662678 0.9709505944546686 1.0 +err_prefin 7285141.0 +err_fin 5375617.0 +sparsity check 0.29999276995658875 +time 138.01 +61 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 813747.75 +err_fin 766580.6875 +sparsity check 0.29999999489103046 +time 135.78 +62 self_attn.q_proj +Pruning ... +0.2999762147665024 0.09997619688510895 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908482656089139 0.9709505944546686 1.0 +err_prefin 183847.09375 +err_fin 138195.8125 +sparsity check 0.2999762147665024 +time 74.91 +62 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 60996.53125 +err_fin 49719.8203125 +sparsity check 0.29999983310699463 +time 1.35 +62 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 195448.65625 +err_fin 173201.8125 +sparsity check 0.29999983310699463 +time 1.31 +62 self_attn.o_proj +Pruning ... +0.2990509569644928 0.09905093908309937 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.187908032693221 0.9709505944546686 1.0 +err_prefin 39808.921875 +err_fin 21340.3515625 +sparsity check 0.2990509569644928 +time 67.81 +62 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 8432903.0 +err_fin 6258731.0 +sparsity check 0.29999999489103046 +time 137.74 +62 mlp.up_proj +Pruning ... +0.29999276995658875 0.1999746859073639 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060101194662678 0.9709505944546686 1.0 +err_prefin 7473114.0 +err_fin 5530597.0 +sparsity check 0.29999276995658875 +time 138.19 +62 mlp.down_proj +Pruning ... +0.2999999906335558 0.19999997317790985 0.242857141154153 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245631764015 0.9709505944546686 1.0 +err_prefin 826638.8125 +err_fin 779265.5 +sparsity check 0.2999999906335558 +time 135.78 +63 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 160645.734375 +err_fin 121673.390625 +sparsity check 0.29998879134655 +time 74.94 +63 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 48778.3046875 +err_fin 39141.375 +sparsity check 0.29999983310699463 +time 1.33 +63 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 157449.4375 +err_fin 137935.078125 +sparsity check 0.29999983310699463 +time 1.31 +63 self_attn.o_proj +Pruning ... +0.29345013201236725 0.09345011413097382 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1698103230969439 0.9709505944546686 1.0 +err_prefin 34115.03515625 +err_fin 20066.1640625 +sparsity check 0.29345013201236725 +time 67.90 +63 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 8570918.0 +err_fin 6379026.5 +sparsity check 0.29999999489103046 +time 137.81 +63 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 7615606.5 +err_fin 5653985.5 +sparsity check 0.29999999489103046 +time 138.07 +63 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 836566.25 +err_fin 789226.4375 +sparsity check 0.29999999489103046 +time 135.88 +64 self_attn.q_proj +Pruning ... +0.2999767065048218 0.09997668862342834 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908498245682169 0.9709505944546686 1.0 +err_prefin 299505.1875 +err_fin 226632.09375 +sparsity check 0.2999767065048218 +time 74.97 +64 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 93323.4375 +err_fin 74509.125 +sparsity check 0.29999983310699463 +time 1.34 +64 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 216622.78125 +err_fin 194778.28125 +sparsity check 0.29999983310699463 +time 1.31 +64 self_attn.o_proj +Pruning ... +0.2868971824645996 0.08689716458320618 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1479553973166503 0.9709505944546686 1.0 +err_prefin 50887.25 +err_fin 30267.08984375 +sparsity check 0.2868971824645996 +time 67.90 +64 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 8715448.0 +err_fin 6496698.0 +sparsity check 0.29999999489103046 +time 137.89 +64 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 7766734.0 +err_fin 5779098.5 +sparsity check 0.29999999489103046 +time 138.35 +64 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 851869.125 +err_fin 804173.625 +sparsity check 0.29999999489103046 +time 135.84 +65 self_attn.q_proj +Pruning ... +0.2999642491340637 0.09996423125267029 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908103297375292 0.9709505944546686 1.0 +err_prefin 83586.21875 +err_fin 63058.08984375 +sparsity check 0.2999642491340637 +time 74.97 +65 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 27326.509765625 +err_fin 21336.466796875 +sparsity check 0.29999983310699463 +time 1.35 +65 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 121402.46875 +err_fin 106059.3046875 +sparsity check 0.29999983310699463 +time 1.33 +65 self_attn.o_proj +Pruning ... +0.2976520359516144 0.09765201807022095 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.183436299803295 0.9709505944546686 1.0 +err_prefin 27571.044921875 +err_fin 15242.9150390625 +sparsity check 0.2976520359516144 +time 67.85 +65 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 8837253.0 +err_fin 6610702.5 +sparsity check 0.29999999489103046 +time 137.81 +65 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 7916858.0 +err_fin 5912370.5 +sparsity check 0.29999999489103046 +time 138.16 +65 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 866969.0 +err_fin 818632.5 +sparsity check 0.29999999489103046 +time 135.84 +66 self_attn.q_proj +Pruning ... +0.29998789727687836 0.09998787939548492 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908853016546297 0.9709505944546686 1.0 +err_prefin 149749.015625 +err_fin 112705.265625 +sparsity check 0.29998789727687836 +time 74.94 +66 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 47971.12109375 +err_fin 37845.2578125 +sparsity check 0.29999983310699463 +time 1.35 +66 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 158214.859375 +err_fin 141883.671875 +sparsity check 0.29999983310699463 +time 1.31 +66 self_attn.o_proj +Pruning ... +0.2893896996974945 0.08938968181610107 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1563574977218163 0.9709505944546686 1.0 +err_prefin 41824.7890625 +err_fin 27087.11328125 +sparsity check 0.2893896996974945 +time 67.91 +66 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 9038488.0 +err_fin 6757903.0 +sparsity check 0.29999999489103046 +time 137.76 +66 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 8152020.0 +err_fin 6083348.5 +sparsity check 0.29999999489103046 +time 138.23 +66 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 911303.9375 +err_fin 860313.625 +sparsity check 0.29999999489103046 +time 135.85 +67 self_attn.q_proj +Pruning ... +0.29998789727687836 0.09998787939548492 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908853016546297 0.9709505944546686 1.0 +err_prefin 63394.7734375 +err_fin 47541.875 +sparsity check 0.29998789727687836 +time 75.00 +67 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 22216.2890625 +err_fin 16543.91015625 +sparsity check 0.29999983310699463 +time 1.33 +67 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 68837.0859375 +err_fin 57603.5 +sparsity check 0.29999983310699463 +time 1.31 +67 self_attn.o_proj +Pruning ... +0.29094041883945465 0.09094040095806122 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.161529118747347 0.9709505944546686 1.0 +err_prefin 21132.658203125 +err_fin 9523.146484375 +sparsity check 0.29094041883945465 +time 67.92 +67 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 9077870.0 +err_fin 6794662.0 +sparsity check 0.29999999489103046 +time 137.93 +67 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 8247882.5 +err_fin 6164708.0 +sparsity check 0.29999999489103046 +time 138.18 +67 mlp.down_proj +Pruning ... +0.2999999906335558 0.19999997317790985 0.242857141154153 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245631764015 0.9709505944546686 1.0 +err_prefin 909022.625 +err_fin 857260.1875 +sparsity check 0.2999999906335558 +time 135.98 +68 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 542966.5 +err_fin 412181.25 +sparsity check 0.29998879134655 +time 74.95 +68 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 158593.75 +err_fin 129041.609375 +sparsity check 0.29999983310699463 +time 1.32 +68 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 309721.03125 +err_fin 279954.875 +sparsity check 0.29999983310699463 +time 1.30 +68 self_attn.o_proj +Pruning ... +0.28747063875198364 0.08747062087059021 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1498983780675118 0.9709505944546686 1.0 +err_prefin 41402.9375 +err_fin 26306.3984375 +sparsity check 0.28747063875198364 +time 67.91 +68 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 9395424.0 +err_fin 7047702.5 +sparsity check 0.29999999489103046 +time 137.77 +68 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 8598284.0 +err_fin 6436147.0 +sparsity check 0.29999999489103046 +time 138.35 +68 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 978026.25 +err_fin 920440.875 +sparsity check 0.29999999489103046 +time 136.08 +69 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 900533.125 +err_fin 684633.9375 +sparsity check 0.29998879134655 +time 75.00 +69 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 286843.1875 +err_fin 234974.984375 +sparsity check 0.29999983310699463 +time 1.31 +69 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 417839.0625 +err_fin 373894.9375 +sparsity check 0.29999983310699463 +time 1.31 +69 self_attn.o_proj +Pruning ... +0.28137916326522827 0.08137914538383484 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1289482802695847 0.9709505944546686 1.0 +err_prefin 89954.125 +err_fin 52598.1875 +sparsity check 0.28137916326522827 +time 67.98 +69 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 9581910.0 +err_fin 7180432.0 +sparsity check 0.29999999489103046 +time 137.90 +69 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 8861478.0 +err_fin 6631839.0 +sparsity check 0.29999999489103046 +time 138.13 +69 mlp.down_proj +Pruning ... +0.2999999906335558 0.19999997317790985 0.242857141154153 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245631764015 0.9709505944546686 1.0 +err_prefin 1031425.4375 +err_fin 971489.1875 +sparsity check 0.2999999906335558 +time 135.85 +70 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 532644.75 +err_fin 401152.875 +sparsity check 0.29998879134655 +time 74.96 +70 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 161935.875 +err_fin 128223.234375 +sparsity check 0.29999983310699463 +time 1.34 +70 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 246997.28125 +err_fin 218991.875 +sparsity check 0.29999983310699463 +time 1.32 +70 self_attn.o_proj +Pruning ... +0.2764320373535156 0.07643201947212219 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.111408817228582 0.9709505944546686 1.0 +err_prefin 74907.53125 +err_fin 39078.69140625 +sparsity check 0.2764320373535156 +time 67.86 +70 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 9785912.0 +err_fin 7322683.0 +sparsity check 0.29999999489103046 +time 137.79 +70 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 9113500.0 +err_fin 6808667.5 +sparsity check 0.29999999489103046 +time 138.21 +70 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1106128.5 +err_fin 1040462.5625 +sparsity check 0.29999999489103046 +time 135.72 +71 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 688739.875 +err_fin 523288.21875 +sparsity check 0.29998879134655 +time 74.94 +71 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 215805.96875 +err_fin 174147.234375 +sparsity check 0.29999983310699463 +time 1.33 +71 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 259656.25 +err_fin 231612.75 +sparsity check 0.29999983310699463 +time 1.31 +71 self_attn.o_proj +Pruning ... +0.28335021436214447 0.08335019648075104 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.13580331517402 0.9709505944546686 1.0 +err_prefin 110948.1796875 +err_fin 54004.78125 +sparsity check 0.28335021436214447 +time 67.91 +71 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 10049788.0 +err_fin 7501033.5 +sparsity check 0.29999999489103046 +time 137.82 +71 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 9385523.0 +err_fin 6993777.0 +sparsity check 0.29999999489103046 +time 138.07 +71 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1151471.5 +err_fin 1078902.25 +sparsity check 0.29999999489103046 +time 135.81 +72 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 951426.125 +err_fin 719770.125 +sparsity check 0.29998879134655 +time 74.94 +72 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 272272.0625 +err_fin 225153.5 +sparsity check 0.29999983310699463 +time 1.32 +72 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 299265.09375 +err_fin 267822.75 +sparsity check 0.29999983310699463 +time 1.31 +72 self_attn.o_proj +Pruning ... +0.28141236305236816 0.08141234517097473 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1290643606091357 0.9709505944546686 1.0 +err_prefin 133972.34375 +err_fin 76113.6328125 +sparsity check 0.28141236305236816 +time 67.90 +72 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 10290312.0 +err_fin 7646359.5 +sparsity check 0.29999999489103046 +time 137.79 +72 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 9684390.0 +err_fin 7182011.5 +sparsity check 0.29999999489103046 +time 138.19 +72 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1238548.25 +err_fin 1157237.5 +sparsity check 0.29999999489103046 +time 135.74 +73 self_attn.q_proj +Pruning ... +0.29998879134655 0.09998877346515656 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908881359557477 0.9709505944546686 1.0 +err_prefin 893224.3125 +err_fin 670156.5 +sparsity check 0.29998879134655 +time 74.94 +73 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 259352.890625 +err_fin 212043.390625 +sparsity check 0.29999983310699463 +time 1.33 +73 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 347109.5625 +err_fin 303783.3125 +sparsity check 0.29999983310699463 +time 1.32 +73 self_attn.o_proj +Pruning ... +0.2856079190969467 0.08560790121555328 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1435652362660824 0.9709505944546686 1.0 +err_prefin 83023.703125 +err_fin 46943.09375 +sparsity check 0.2856079190969467 +time 67.88 +73 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 10542981.0 +err_fin 7793762.0 +sparsity check 0.29999999489103046 +time 137.81 +73 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 9977112.0 +err_fin 7360894.0 +sparsity check 0.29999999489103046 +time 138.11 +73 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1356530.0 +err_fin 1256825.5 +sparsity check 0.29999999489103046 +time 135.71 +74 self_attn.q_proj +Pruning ... +0.29999998211860657 0.09999996423721313 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1909236108740666 0.9709505944546686 1.0 +err_prefin 836136.125 +err_fin 612812.75 +sparsity check 0.29999998211860657 +time 74.95 +74 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 243933.1875 +err_fin 190287.96875 +sparsity check 0.29999983310699463 +time 1.31 +74 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 266785.625 +err_fin 229428.234375 +sparsity check 0.29999983310699463 +time 1.31 +74 self_attn.o_proj +Pruning ... +0.2842986732721329 0.08429865539073944 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.139075677794163 0.9709505944546686 1.0 +err_prefin 199612.921875 +err_fin 92505.375 +sparsity check 0.2842986732721329 +time 67.88 +74 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 10793855.0 +err_fin 7861494.0 +sparsity check 0.29999999489103046 +time 137.74 +74 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 10255294.0 +err_fin 7452481.0 +sparsity check 0.29999999489103046 +time 138.20 +74 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1510742.375 +err_fin 1389251.125 +sparsity check 0.29999999489103046 +time 135.70 +75 self_attn.q_proj +Pruning ... +0.29999998211860657 0.09999996423721313 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1909236108740666 0.9709505944546686 1.0 +err_prefin 837856.75 +err_fin 608661.25 +sparsity check 0.29999998211860657 +time 74.92 +75 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 236869.59375 +err_fin 178914.90625 +sparsity check 0.29999983310699463 +time 1.34 +75 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 324162.875 +err_fin 271104.4375 +sparsity check 0.29999983310699463 +time 1.31 +75 self_attn.o_proj +Pruning ... +0.28303997218608856 0.08303995430469513 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.134729242618206 0.9709505944546686 1.0 +err_prefin 205746.5625 +err_fin 94100.859375 +sparsity check 0.28303997218608856 +time 67.90 +75 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 10955038.0 +err_fin 7880301.0 +sparsity check 0.29999999489103046 +time 137.81 +75 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 10408592.0 +err_fin 7467028.5 +sparsity check 0.29999999489103046 +time 138.02 +75 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 1738963.5 +err_fin 1577450.75 +sparsity check 0.29999999489103046 +time 135.80 +76 self_attn.q_proj +Pruning ... +0.29999998211860657 0.09999996423721313 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1909236108740666 0.9709505944546686 1.0 +err_prefin 1289686.0 +err_fin 908438.4375 +sparsity check 0.29999998211860657 +time 74.94 +76 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 349882.875 +err_fin 261249.71875 +sparsity check 0.29999983310699463 +time 1.33 +76 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 487619.5625 +err_fin 406323.0 +sparsity check 0.29999983310699463 +time 1.31 +76 self_attn.o_proj +Pruning ... +0.280566543340683 0.08056652545928955 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1261003695482468 0.9709505944546686 1.0 +err_prefin 628972.0 +err_fin 322890.15625 +sparsity check 0.280566543340683 +time 67.90 +76 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 10994782.0 +err_fin 7706991.5 +sparsity check 0.29999999489103046 +time 137.78 +76 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 10381006.0 +err_fin 7253138.0 +sparsity check 0.29999999489103046 +time 138.17 +76 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 2176063.5 +err_fin 1922020.0 +sparsity check 0.29999999489103046 +time 135.67 +77 self_attn.q_proj +Pruning ... +0.29999998211860657 0.09999996423721313 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1909236108740666 0.9709505944546686 1.0 +err_prefin 885047.625 +err_fin 596472.5 +sparsity check 0.29999998211860657 +time 74.95 +77 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 256333.953125 +err_fin 182484.203125 +sparsity check 0.29999983310699463 +time 1.34 +77 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 336631.5 +err_fin 273807.1875 +sparsity check 0.29999983310699463 +time 1.31 +77 self_attn.o_proj +Pruning ... +0.28083010017871857 0.08083008229732513 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1270254352798543 0.9709505944546686 1.0 +err_prefin 314081.3125 +err_fin 125044.8125 +sparsity check 0.28083010017871857 +time 67.85 +77 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 10395529.0 +err_fin 7067618.0 +sparsity check 0.29999999489103046 +time 137.76 +77 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 9835056.0 +err_fin 6655396.0 +sparsity check 0.29999999489103046 +time 138.06 +77 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 2710644.5 +err_fin 2303077.5 +sparsity check 0.29999999489103046 +time 135.82 +78 self_attn.q_proj +Pruning ... +0.29999998211860657 0.09999996423721313 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1909236108740666 0.9709505944546686 1.0 +err_prefin 872924.625 +err_fin 548241.25 +sparsity check 0.29999998211860657 +time 74.93 +78 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 267820.6875 +err_fin 179371.28125 +sparsity check 0.29999983310699463 +time 1.34 +78 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 289426.4375 +err_fin 240119.75 +sparsity check 0.29999983310699463 +time 1.31 +78 self_attn.o_proj +Pruning ... +0.2920469343662262 0.09204691648483276 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1651936344078773 0.9709505944546686 1.0 +err_prefin 246184.46875 +err_fin 97441.7578125 +sparsity check 0.2920469343662262 +time 67.89 +78 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 8960266.0 +err_fin 5861117.5 +sparsity check 0.29999999489103046 +time 137.81 +78 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 8408182.0 +err_fin 5463602.0 +sparsity check 0.29999999489103046 +time 138.23 +78 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 3032714.0 +err_fin 2369612.25 +sparsity check 0.29999999489103046 +time 135.74 +79 self_attn.q_proj +Pruning ... +0.2999882996082306 0.09998828172683716 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1908865770917187 0.9709505944546686 1.0 +err_prefin 587816.625 +err_fin 339601.15625 +sparsity check 0.2999882996082306 +time 74.95 +79 self_attn.k_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 199996.71875 +err_fin 134468.171875 +sparsity check 0.29999983310699463 +time 1.33 +79 self_attn.v_proj +Pruning ... +0.29999983310699463 0.19999980926513672 0.27499985694885254 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9387889424076327 0.9709505944546686 1.0 +err_prefin 137283.46875 +err_fin 104226.09375 +sparsity check 0.29999983310699463 +time 1.31 +79 self_attn.o_proj +Pruning ... +0.27929268777370453 0.0792926698923111 0.20000001788139343 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.1216100825857187 0.9709505944546686 1.0 +err_prefin 71334.6875 +err_fin 20835.970703125 +sparsity check 0.27929268777370453 +time 67.87 +79 mlp.gate_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 5652304.5 +err_fin 3413151.25 +sparsity check 0.29999999489103046 +time 137.76 +79 mlp.up_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 5221667.0 +err_fin 3130273.0 +sparsity check 0.29999999489103046 +time 138.04 +79 mlp.down_proj +Pruning ... +0.29999999489103046 0.19999997317790985 0.24285714541162765 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0060245701606083 0.9709505944546686 1.0 +err_prefin 2738567.0 +err_fin 1698428.75 +sparsity check 0.29999999489103046 +time 135.81 +model.embed_tokens.weight tensor(2.5520e-06) +model.layers.0.self_attn.q_proj.weight tensor(4.5747e-06) +model.layers.0.self_attn.k_proj.weight tensor(0.0571) +model.layers.0.self_attn.v_proj.weight tensor(0.0995) +model.layers.0.self_attn.o_proj.weight tensor(4.8578e-06) +model.layers.0.mlp.gate_proj.weight tensor(2.8568e-06) +model.layers.0.mlp.up_proj.weight tensor(2.7631e-06) +model.layers.0.mlp.down_proj.weight tensor(0.0548) +50991.72986912727 +Dataset: wikitext2 +Evaluating ... +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +Perplexity: 5.992858 diff --git a/logs/llama2-70-0.7-no-final b/logs/llama2-70-0.7-no-final new file mode 100644 index 0000000..25c9081 --- /dev/null +++ b/logs/llama2-70-0.7-no-final @@ -0,0 +1,2897 @@ +Running on dev: cuda:0 +loading llama +llama loaded +Starting... on device cuda:0 +Ready. +0 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 31.5526065826416 +time 74.35 +0 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 52.44609832763672 +time 1.29 +0 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 11.234882354736328 +time 1.31 +0 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 2.254166603088379 +time 67.06 +0 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 998.2425537109375 +time 132.89 +0 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1007.9627075195312 +time 133.17 +0 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 19.838790893554688 +time 132.32 +1 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 554.335693359375 +time 74.15 +1 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 661.0631103515625 +time 1.30 +1 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 79.96988677978516 +time 1.29 +1 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 66.66436767578125 +time 67.07 +1 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 7378.185546875 +time 132.96 +1 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 8184.15087890625 +time 133.28 +1 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 406.24114990234375 +time 132.31 +2 self_attn.q_proj +Pruning ... +0.2999999523162842 0.15999996662139893 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482489674002 0.9709505944546686 1.0 +err_prefin 2442.3017578125 +time 74.23 +2 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 2571.81005859375 +time 1.30 +2 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 344.1282043457031 +time 1.31 +2 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 310.08148193359375 +time 67.14 +2 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 31604.423828125 +time 132.88 +2 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 33511.109375 +time 133.27 +2 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1480.284423828125 +time 132.44 +3 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 23244.59765625 +time 74.19 +3 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 14483.0224609375 +time 1.31 +3 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 4647.0556640625 +time 1.31 +3 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 546.3053588867188 +time 67.16 +3 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 76256.078125 +time 132.87 +3 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 77684.734375 +time 133.26 +3 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2164.01904296875 +time 132.32 +4 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 48465.7265625 +time 74.34 +4 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 21516.95703125 +time 1.31 +4 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 7692.712890625 +time 1.30 +4 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 732.003662109375 +time 67.19 +4 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 131821.96875 +time 132.93 +4 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 130936.140625 +time 133.32 +4 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3334.408203125 +time 132.42 +5 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 73133.65625 +time 74.19 +5 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 35456.4140625 +time 1.29 +5 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 11244.857421875 +time 1.30 +5 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 1221.337158203125 +time 67.11 +5 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 169625.625 +time 132.92 +5 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 167639.46875 +time 133.29 +5 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4784.5341796875 +time 132.42 +6 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 84363.0546875 +time 74.20 +6 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 39407.2265625 +time 1.32 +6 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 13365.173828125 +time 1.31 +6 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 2315.88818359375 +time 67.17 +6 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 231124.15625 +time 133.00 +6 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 226603.40625 +time 133.35 +6 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 7224.4072265625 +time 132.42 +7 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 138685.34375 +time 74.33 +7 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 60184.25390625 +time 1.31 +7 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 18989.333984375 +time 1.30 +7 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 2784.62060546875 +time 67.25 +7 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 288652.6875 +time 132.94 +7 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 282198.1875 +time 133.24 +7 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 10346.4921875 +time 132.44 +8 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 119998.609375 +time 74.29 +8 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 52935.63671875 +time 1.31 +8 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 18440.67578125 +time 1.31 +8 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 4614.5458984375 +time 67.12 +8 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 403003.34375 +time 133.03 +8 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 386727.4375 +time 133.35 +8 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 17268.1328125 +time 132.39 +9 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 132674.0625 +time 74.35 +9 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 57647.3515625 +time 1.32 +9 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 27806.244140625 +time 1.31 +9 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 2570.13818359375 +time 67.29 +9 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 520979.5 +time 133.17 +9 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 496662.4375 +time 133.29 +9 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 18120.21484375 +time 132.37 +10 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 75256.96875 +time 74.34 +10 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 30597.345703125 +time 1.31 +10 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 27144.484375 +time 1.31 +10 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 1852.525146484375 +time 67.26 +10 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 641454.375 +time 133.22 +10 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 608783.8125 +time 133.13 +10 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 19486.072265625 +time 132.70 +11 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 126189.28125 +time 74.34 +11 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 60340.3515625 +time 1.30 +11 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 31537.26171875 +time 1.31 +11 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 2960.74169921875 +time 67.27 +11 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 691041.5 +time 133.25 +11 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 658883.5625 +time 133.63 +11 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 21771.5234375 +time 132.76 +12 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 108341.5234375 +time 74.18 +12 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 52667.33984375 +time 1.31 +12 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 23476.75390625 +time 1.29 +12 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 3583.69482421875 +time 67.13 +12 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 688974.125 +time 132.90 +12 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 662338.4375 +time 133.62 +12 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 24323.609375 +time 132.64 +13 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 137623.046875 +time 74.21 +13 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 58294.0859375 +time 1.32 +13 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 32058.76953125 +time 1.30 +13 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 8978.650390625 +time 67.10 +13 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 729425.375 +time 132.88 +13 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 707805.375 +time 133.27 +13 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 31950.888671875 +time 132.36 +14 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 236391.03125 +time 74.15 +14 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 97577.7109375 +time 1.31 +14 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 44853.12109375 +time 1.30 +14 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 11212.888671875 +time 67.11 +14 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 849021.25 +time 132.88 +14 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 823579.5 +time 133.33 +14 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 33598.4609375 +time 132.34 +15 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 167237.703125 +time 74.19 +15 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 67977.171875 +time 1.30 +15 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 55613.015625 +time 1.30 +15 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 11331.427734375 +time 67.10 +15 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 908655.125 +time 132.90 +15 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 886018.125 +time 133.28 +15 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 39335.1484375 +time 132.48 +16 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 163885.6875 +time 74.22 +16 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 72068.109375 +time 1.31 +16 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 51465.4609375 +time 1.31 +16 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 10284.2802734375 +time 67.15 +16 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 963040.8125 +time 132.96 +16 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 940930.75 +time 133.34 +16 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 39663.0390625 +time 132.44 +17 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 215545.953125 +time 74.21 +17 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 92451.4375 +time 1.31 +17 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 44456.1328125 +time 1.32 +17 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 15384.744140625 +time 67.12 +17 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 941519.4375 +time 132.89 +17 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 923326.25 +time 133.25 +17 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 52658.8671875 +time 132.33 +18 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 197980.59375 +time 74.18 +18 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 82839.6640625 +time 1.34 +18 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 69647.796875 +time 1.29 +18 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 15146.6953125 +time 67.08 +18 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1038781.875 +time 132.87 +18 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1012503.5 +time 133.23 +18 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 56106.6640625 +time 132.40 +19 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 239990.21875 +time 74.19 +19 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 96670.7578125 +time 1.31 +19 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 83000.84375 +time 1.32 +19 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 14134.486328125 +time 67.13 +19 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1127269.5 +time 132.90 +19 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1098608.0 +time 133.28 +19 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 55697.8515625 +time 132.38 +20 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 209576.1875 +time 74.18 +20 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 84032.640625 +time 1.30 +20 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 67712.90625 +time 1.30 +20 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 9953.4306640625 +time 67.11 +20 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1217238.5 +time 132.91 +20 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1188032.5 +time 133.23 +20 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 55471.19140625 +time 132.31 +21 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 122628.21875 +time 74.19 +21 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 48572.55078125 +time 1.31 +21 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 57993.25 +time 1.30 +21 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 14676.3857421875 +time 67.11 +21 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1264665.5 +time 132.90 +21 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1240083.125 +time 133.30 +21 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 59337.2421875 +time 132.37 +22 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 118583.1796875 +time 74.17 +22 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 54648.171875 +time 1.31 +22 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 66441.0625 +time 1.30 +22 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 10421.3662109375 +time 67.10 +22 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1336592.25 +time 132.87 +22 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1313410.5 +time 133.35 +22 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 60978.33203125 +time 132.01 +23 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 180634.46875 +time 74.17 +23 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 85172.7890625 +time 1.31 +23 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 70221.453125 +time 1.31 +23 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 13395.240234375 +time 67.12 +23 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1378777.25 +time 132.87 +23 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1358027.25 +time 133.23 +23 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 62420.25390625 +time 132.34 +24 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 160180.546875 +time 74.18 +24 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 75499.0546875 +time 1.30 +24 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 52861.1875 +time 1.31 +24 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 11313.181640625 +time 67.07 +24 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1349332.375 +time 132.84 +24 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1336751.0 +time 133.24 +24 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 63374.6484375 +time 132.39 +25 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 182253.78125 +time 74.23 +25 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 85136.703125 +time 1.30 +25 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 55436.890625 +time 1.29 +25 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 29487.58203125 +time 67.14 +25 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1266213.5 +time 132.94 +25 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1264720.25 +time 133.28 +25 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 92306.71875 +time 132.37 +26 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 278793.5625 +time 74.19 +26 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 123254.59375 +time 1.30 +26 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 83458.3828125 +time 1.31 +26 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 32763.388671875 +time 67.08 +26 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1450715.125 +time 132.99 +26 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1442068.625 +time 133.20 +26 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 89121.875 +time 132.34 +27 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 117377.765625 +time 74.23 +27 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 57734.515625 +time 1.31 +27 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 70675.015625 +time 1.31 +27 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 14198.720703125 +time 67.15 +27 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1539881.75 +time 132.91 +27 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1529547.0 +time 133.27 +27 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 93750.03125 +time 132.36 +28 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 222693.65625 +time 74.22 +28 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 107735.625 +time 1.31 +28 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 108025.09375 +time 1.31 +28 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 22527.2421875 +time 67.13 +28 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1629005.25 +time 132.93 +28 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1618156.125 +time 133.27 +28 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 98853.671875 +time 132.33 +29 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 217407.625 +time 74.21 +29 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 98504.375 +time 1.31 +29 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 117649.53125 +time 1.30 +29 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 27002.08984375 +time 67.17 +29 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1730684.5 +time 132.96 +29 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1715264.5 +time 133.30 +29 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 104639.296875 +time 132.36 +30 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 105451.1015625 +time 74.24 +30 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 41481.4375 +time 1.31 +30 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 107683.2578125 +time 1.31 +30 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 15626.08203125 +time 67.09 +30 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1836840.625 +time 132.94 +30 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1815497.375 +time 133.29 +30 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 107850.6328125 +time 132.29 +31 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 226024.875 +time 74.22 +31 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 100023.734375 +time 1.31 +31 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 116832.1328125 +time 1.31 +31 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 30331.01171875 +time 67.11 +31 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1927874.25 +time 132.94 +31 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1899479.5 +time 133.17 +31 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 109930.75 +time 132.35 +32 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 268912.8125 +time 74.19 +32 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 115952.984375 +time 1.36 +32 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 103949.9375 +time 1.30 +32 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 18538.21875 +time 67.11 +32 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1977070.625 +time 132.95 +32 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1951940.0 +time 133.28 +32 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 115680.390625 +time 132.31 +33 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 389546.34375 +time 74.18 +33 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 156150.109375 +time 1.30 +33 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 115923.796875 +time 1.30 +33 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 38637.20703125 +time 67.10 +33 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1992290.125 +time 133.02 +33 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1972915.0 +time 133.38 +33 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 134200.359375 +time 132.50 +34 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 125157.125 +time 74.22 +34 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 54646.32421875 +time 1.32 +34 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 69443.5625 +time 1.31 +34 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 24770.6875 +time 67.10 +34 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2061498.25 +time 133.07 +34 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2041977.75 +time 133.40 +34 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 146259.5 +time 132.44 +35 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 218872.25 +time 74.16 +35 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 86917.953125 +time 1.30 +35 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 94099.5625 +time 1.31 +35 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 29882.33984375 +time 67.10 +35 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2189425.5 +time 132.93 +35 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2167214.0 +time 133.27 +35 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 155221.28125 +time 132.38 +36 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 209964.21875 +time 74.20 +36 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 86419.625 +time 1.31 +36 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 91525.859375 +time 1.31 +36 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 21488.21484375 +time 67.14 +36 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2239729.5 +time 132.94 +36 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2219749.0 +time 133.27 +36 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 173578.78125 +time 132.33 +37 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 348986.5625 +time 74.19 +37 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 140931.5625 +time 1.31 +37 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 116855.0625 +time 1.31 +37 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 35544.62109375 +time 67.10 +37 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2341148.0 +time 132.88 +37 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2321247.0 +time 133.24 +37 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 188810.5625 +time 132.32 +38 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 390541.6875 +time 74.20 +38 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 154272.296875 +time 1.31 +38 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 123844.84375 +time 1.31 +38 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 60033.3515625 +time 67.11 +38 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2411030.0 +time 132.92 +38 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2395395.0 +time 133.25 +38 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 213351.5625 +time 132.31 +39 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 482137.3125 +time 74.18 +39 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 200098.65625 +time 1.31 +39 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 134308.96875 +time 1.31 +39 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 65089.78125 +time 67.13 +39 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2431834.0 +time 132.90 +39 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2419592.25 +time 133.23 +39 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 261454.25 +time 132.33 +40 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 447430.53125 +time 74.21 +40 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 174069.1875 +time 1.31 +40 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 153654.140625 +time 1.31 +40 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 94084.90625 +time 67.14 +40 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2520647.75 +time 132.95 +40 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2505940.5 +time 133.27 +40 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 300964.78125 +time 132.32 +41 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 360327.9375 +time 74.23 +41 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 137488.0 +time 1.32 +41 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 133586.4375 +time 1.31 +41 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 104387.6484375 +time 67.14 +41 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2610131.0 +time 132.93 +41 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2554464.75 +time 133.25 +41 mlp.down_proj +Pruning ... +0.2999999863760812 0.2499999850988388 0.2285714192049844 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063847936203 0.9709505944546686 1.0 +err_prefin 357945.25 +time 132.38 +42 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 417292.25 +time 74.17 +42 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 149188.53125 +time 1.31 +42 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 164675.15625 +time 1.29 +42 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 94280.3203125 +time 67.08 +42 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2812360.5 +time 132.88 +42 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2692355.75 +time 133.21 +42 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 380780.8125 +time 132.32 +43 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 283651.125 +time 74.18 +43 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 122436.4375 +time 1.31 +43 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 129223.125 +time 1.31 +43 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 76335.546875 +time 67.07 +43 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2922340.0 +time 132.84 +43 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2770022.0 +time 133.23 +43 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 419566.5625 +time 132.38 +44 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 514903.5 +time 74.17 +44 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 152951.6875 +time 1.30 +44 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 195854.1875 +time 1.30 +44 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 180027.28125 +time 67.08 +44 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3191728.25 +time 132.89 +44 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 2930800.5 +time 133.20 +44 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 490765.28125 +time 132.35 +45 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 321122.6875 +time 74.19 +45 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 128542.78125 +time 1.31 +45 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 190542.5625 +time 1.31 +45 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 56156.19140625 +time 67.12 +45 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3449118.0 +time 132.91 +45 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3128793.5 +time 133.24 +45 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 493217.34375 +time 132.38 +46 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 265631.4375 +time 74.17 +46 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 101115.84375 +time 1.31 +46 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 164556.421875 +time 1.30 +46 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 78668.4375 +time 67.11 +46 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3702983.0 +time 132.93 +46 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3307951.0 +time 133.22 +46 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 491784.375 +time 132.36 +47 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 367896.5 +time 74.18 +47 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 147918.25 +time 1.31 +47 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 162615.296875 +time 1.31 +47 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 107631.9375 +time 67.11 +47 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3811822.25 +time 132.93 +47 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3361974.0 +time 133.23 +47 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 555750.125 +time 132.44 +48 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 174250.34375 +time 74.16 +48 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 57852.25 +time 1.29 +48 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 154109.546875 +time 1.30 +48 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 71560.65625 +time 67.05 +48 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3988673.0 +time 132.88 +48 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3501328.0 +time 133.21 +48 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 549560.8125 +time 132.34 +49 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 93208.640625 +time 74.20 +49 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 40360.7890625 +time 1.31 +49 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 125977.703125 +time 1.31 +49 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 25567.244140625 +time 67.11 +49 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4134557.5 +time 132.97 +49 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3626741.0 +time 133.31 +49 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 546702.75 +time 132.34 +50 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 155188.4375 +time 74.16 +50 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 52855.34375 +time 1.31 +50 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 160687.25 +time 1.31 +50 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 60433.125 +time 67.07 +50 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4292964.0 +time 132.89 +50 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3749839.0 +time 133.24 +50 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 548676.375 +time 132.35 +51 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 204507.65625 +time 74.21 +51 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 76368.125 +time 1.31 +51 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 166325.875 +time 1.30 +51 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 54188.4140625 +time 67.18 +51 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4411626.0 +time 132.97 +51 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3864618.75 +time 133.27 +51 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 548309.5 +time 132.36 +52 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 345538.46875 +time 74.17 +52 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 97990.2734375 +time 1.30 +52 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 190622.46875 +time 1.31 +52 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 97987.203125 +time 67.10 +52 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4509819.5 +time 132.92 +52 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3939153.0 +time 133.25 +52 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 557036.75 +time 132.33 +53 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 98797.375 +time 74.23 +53 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 31212.240234375 +time 1.31 +53 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 158531.421875 +time 1.31 +53 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 39168.52734375 +time 67.17 +53 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4629908.5 +time 132.96 +53 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4053844.5 +time 133.28 +53 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 563715.5625 +time 132.33 +54 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 116401.71875 +time 74.17 +54 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 36323.578125 +time 1.31 +54 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 163942.734375 +time 1.29 +54 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 49682.015625 +time 67.09 +54 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4739122.0 +time 132.88 +54 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4161110.75 +time 133.22 +54 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 561708.5625 +time 132.35 +55 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 193069.96875 +time 74.20 +55 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 68852.84375 +time 1.31 +55 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 215138.125 +time 1.31 +55 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 93487.796875 +time 67.11 +55 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4823177.0 +time 132.93 +55 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4263465.0 +time 133.26 +55 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 574455.0 +time 132.33 +56 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 145411.03125 +time 74.18 +56 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 54584.6484375 +time 1.29 +56 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 151472.375 +time 1.31 +56 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 45811.046875 +time 67.06 +56 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4864284.0 +time 132.86 +56 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4313716.0 +time 133.22 +56 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 577074.625 +time 132.33 +57 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 126728.4453125 +time 74.17 +57 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 42517.40625 +time 1.30 +57 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 192418.0625 +time 1.29 +57 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 51367.7734375 +time 67.06 +57 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5038693.5 +time 132.87 +57 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4473327.5 +time 133.23 +57 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 587364.125 +time 132.36 +58 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 66652.5625 +time 74.17 +58 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 21538.62109375 +time 1.30 +58 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 137860.5625 +time 1.30 +58 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 27888.09765625 +time 67.08 +58 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5136027.0 +time 132.87 +58 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4568492.0 +time 133.21 +58 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 588803.625 +time 132.34 +59 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 88454.71875 +time 74.19 +59 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 26870.552734375 +time 1.30 +59 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 152944.90625 +time 1.30 +59 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 27917.62109375 +time 67.11 +59 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5246179.0 +time 132.91 +59 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4677478.0 +time 133.16 +59 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 587390.625 +time 132.32 +60 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 12727.052734375 +time 74.19 +60 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 3512.98291015625 +time 1.30 +60 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 82602.203125 +time 1.30 +60 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 14933.5986328125 +time 67.15 +60 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5304410.5 +time 132.89 +60 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4751317.5 +time 133.22 +60 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 578973.5 +time 132.35 +61 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 54048.3671875 +time 74.19 +61 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 16042.8564453125 +time 1.31 +61 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 139960.9375 +time 1.31 +61 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 24920.244140625 +time 67.13 +61 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5398592.0 +time 132.95 +61 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4856495.0 +time 133.32 +61 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 580411.625 +time 132.49 +62 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 57423.78515625 +time 74.17 +62 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 19240.78515625 +time 1.30 +62 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 151660.359375 +time 1.30 +62 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 19601.64453125 +time 67.10 +62 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5514424.0 +time 132.88 +62 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 4977589.5 +time 133.25 +62 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 585774.5 +time 132.41 +63 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 53042.68359375 +time 74.20 +63 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 14410.1376953125 +time 1.32 +63 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 118978.71875 +time 1.31 +63 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 19115.703125 +time 67.15 +63 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5625384.5 +time 133.00 +63 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5088692.0 +time 133.37 +63 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 590734.375 +time 132.40 +64 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 102080.15625 +time 74.19 +64 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 33424.9921875 +time 1.31 +64 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 170511.25 +time 1.31 +64 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 33364.4765625 +time 67.20 +64 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5727166.0 +time 132.94 +64 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5197972.0 +time 133.32 +64 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 598185.0625 +time 132.34 +65 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 26113.30859375 +time 74.18 +65 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 6471.1845703125 +time 1.32 +65 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 94210.6796875 +time 1.31 +65 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 13062.37109375 +time 67.11 +65 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5807544.0 +time 132.92 +65 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5295453.5 +time 133.40 +65 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 605141.4375 +time 132.31 +66 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 43937.9921875 +time 74.18 +66 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 12795.828125 +time 1.30 +66 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 124740.25 +time 1.31 +66 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 30385.8828125 +time 67.13 +66 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5949620.5 +time 132.90 +66 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5456178.0 +time 133.30 +66 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 634897.25 +time 132.53 +67 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 21512.400390625 +time 74.19 +67 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 5507.5830078125 +time 1.30 +67 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 51246.125 +time 1.31 +67 self_attn.o_proj +Pruning ... +0.2999999523162842 0.15999996662139893 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482489674002 0.9709505944546686 1.0 +err_prefin 9366.80078125 +time 67.09 +67 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5975366.0 +time 132.92 +67 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5516490.0 +time 133.17 +67 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 629174.375 +time 132.43 +68 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 204316.28125 +time 74.17 +68 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 74044.640625 +time 1.30 +68 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 242222.0 +time 1.30 +68 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 32686.236328125 +time 67.12 +68 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6216001.5 +time 132.90 +68 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5770339.0 +time 133.28 +68 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 681102.875 +time 132.36 +69 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 363704.0 +time 74.20 +69 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 162265.1875 +time 1.31 +69 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 323970.0625 +time 1.30 +69 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 54411.875 +time 67.11 +69 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6338971.5 +time 132.92 +69 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5945954.0 +time 133.28 +69 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 715812.875 +time 132.29 +70 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 198353.359375 +time 74.19 +70 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 75639.3203125 +time 1.31 +70 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 184515.90625 +time 1.30 +70 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 40008.4765625 +time 67.14 +70 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6486191.0 +time 132.90 +70 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6113921.0 +time 133.38 +70 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 766465.3125 +time 132.32 +71 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 282243.5625 +time 74.20 +71 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 123029.546875 +time 1.33 +71 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 198026.59375 +time 1.30 +71 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 52743.42578125 +time 67.14 +71 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6685019.0 +time 132.92 +71 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6310667.0 +time 133.39 +71 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 808048.375 +time 132.28 +72 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 404575.75 +time 74.17 +72 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 169575.328125 +time 1.30 +72 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 231400.0625 +time 1.31 +72 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 68200.359375 +time 67.10 +72 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6837654.0 +time 132.86 +72 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6498931.5 +time 133.22 +72 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 871159.5625 +time 132.30 +73 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 376025.4375 +time 74.18 +73 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 164598.84375 +time 1.35 +73 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 263015.8125 +time 1.31 +73 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 44399.3984375 +time 67.10 +73 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 7012628.0 +time 132.86 +73 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6692871.0 +time 133.33 +73 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 966676.9375 +time 132.32 +74 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 339780.125 +time 74.20 +74 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 141210.046875 +time 1.31 +74 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 202251.15625 +time 1.31 +74 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 94332.234375 +time 67.12 +74 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 7191236.5 +time 132.92 +74 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6890780.0 +time 133.32 +74 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1077826.5 +time 132.38 +75 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 348923.96875 +time 74.24 +75 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 137854.59375 +time 1.36 +75 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 232526.171875 +time 1.31 +75 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 93175.078125 +time 67.16 +75 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 7260220.0 +time 132.95 +75 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6948571.0 +time 133.31 +75 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1219008.25 +time 132.43 +76 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 532576.3125 +time 74.21 +76 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 204126.5 +time 1.30 +76 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 374299.78125 +time 1.31 +76 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 283803.6875 +time 67.14 +76 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 7205197.5 +time 132.96 +76 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6847868.0 +time 133.29 +76 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1495172.0 +time 132.36 +77 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 353864.9375 +time 74.25 +77 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 145092.859375 +time 1.32 +77 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 250922.46875 +time 1.31 +77 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 156918.4375 +time 67.11 +77 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6709413.0 +time 132.94 +77 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 6380281.0 +time 133.21 +77 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1729246.5 +time 132.35 +78 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 316572.75 +time 74.20 +78 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 140766.8125 +time 1.31 +78 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 217920.3125 +time 1.31 +78 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 89117.1328125 +time 67.17 +78 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5597530.5 +time 132.94 +78 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 5267583.0 +time 133.30 +78 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1796634.125 +time 132.43 +79 self_attn.q_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 199077.828125 +time 74.16 +79 self_attn.k_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 109701.8359375 +time 1.31 +79 self_attn.v_proj +Pruning ... +0.2999997138977051 0.2499990463256836 0.26874983310699463 torch.Size([1024, 1024]) torch.Size([1024, 8192]) 0.9410745764702241 0.9709505944546686 1.0 +err_prefin 96453.1171875 +time 1.29 +79 self_attn.o_proj +Pruning ... +0.2999999672174454 0.15999998152256012 0.13999998569488525 torch.Size([8192, 8192]) torch.Size([8192, 8192]) 1.2185482846157119 0.9709505944546686 1.0 +err_prefin 26878.125 +time 67.10 +79 mlp.gate_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3397765.25 +time 132.89 +79 mlp.up_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 3107865.0 +time 133.36 +79 mlp.down_proj +Pruning ... +0.2999999906335558 0.2499999850988388 0.228571423462459 torch.Size([8192, 8192]) torch.Size([8192, 28672]) 1.0073063922650096 0.9709505944546686 1.0 +err_prefin 1388730.375 +time 132.38 +model.embed_tokens.weight tensor(2.5520e-06) +model.layers.0.self_attn.q_proj.weight tensor(0.0139) +model.layers.0.self_attn.k_proj.weight tensor(0.0296) +model.layers.0.self_attn.v_proj.weight tensor(0.0791) +model.layers.0.self_attn.o_proj.weight tensor(4.2617e-06) +model.layers.0.mlp.gate_proj.weight tensor(0.0001) +model.layers.0.mlp.up_proj.weight tensor(0.0001) +model.layers.0.mlp.down_proj.weight tensor(0.0185) +49798.816762685776 +Dataset: wikitext2 +Evaluating ... +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +Perplexity: 4.587645