LabStrangeLoop
diff --git a/‎paper/Makefile‎
Lines changed: 5 additions & 2 deletions b/‎paper/Makefile‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎paper/bibliography.tex‎
Lines changed: 2 additions & 198 deletions b/‎paper/bibliography.tex‎
Lines changed: 2 additions & 198 deletions
diff --git a/‎paper/macros.tex‎
Lines changed: 0 additions & 4 deletions b/‎paper/macros.tex‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎paper/main.pdf‎
-96.5 KB b/‎paper/main.pdf‎
-96.5 KB
diff --git a/‎paper/main.tex‎
Lines changed: 13 additions & 14 deletions b/‎paper/main.tex‎
Lines changed: 13 additions & 14 deletions
diff --git a/‎paper/preamble.tex‎
Lines changed: 2 additions & 4 deletions b/‎paper/preamble.tex‎
Lines changed: 2 additions & 4 deletions
@@ -1,9 +1,12 @@
 MAIN = main
-LATEX = pdflatex
+LATEX = pdflatex -interaction=nonstopmode
+BIBTEX = bibtex
 
 all: $(MAIN).pdf
 
-$(MAIN).pdf: $(MAIN).tex preamble.tex macros.tex bibliography.tex
+$(MAIN).pdf: $(MAIN).tex preamble.tex macros.tex references.bib
+	$(LATEX) $(MAIN)
+	$(BIBTEX) $(MAIN)
 	$(LATEX) $(MAIN)
 	$(LATEX) $(MAIN)
 
 
@@ -1,199 +1,3 @@
 % Shared bibliography — all venues use the same references
-
-\bibliographystyle{plain}
-% \bibliography{references}  % Uncomment when references.bib exists
-
-% Temporary inline references for skeleton
-\begin{thebibliography}{99}
-
-\bibitem{wang2023bitnet}
-H.~Wang et al.
-\newblock BitNet: Scaling 1-bit Transformers for Large Language Models.
-\newblock \emph{arXiv:2310.11453}, 2023.
-
-\bibitem{ma2024bitnetb158}
-S.~Ma et al.
-\newblock The Era of 1-bit LLMs: All Large Language Models are in 1.58 Bits.
-\newblock \emph{arXiv:2402.17764}, 2024.
-
-\bibitem{courbariaux2015binaryconnect}
-M.~Courbariaux et al.
-\newblock BinaryConnect: Training Deep Neural Networks with Binary Weights during Propagations.
-\newblock \emph{NeurIPS}, 2015.
-
-\bibitem{rastegari2016xnornet}
-M.~Rastegari et al.
-\newblock XNOR-Net: ImageNet Classification Using Binary Convolutional Neural Networks.
-\newblock \emph{ECCV}, 2016.
-
-\bibitem{li2016ternary}
-F.~Li et al.
-\newblock Ternary Weight Networks.
-\newblock \emph{arXiv:1605.04711}, 2016.
-
-\bibitem{krizhevsky2009cifar}
-A.~Krizhevsky.
-\newblock Learning Multiple Layers of Features from Tiny Images.
-\newblock Technical report, 2009.
-
-\bibitem{devries2017cutout}
-T.~DeVries and G.~Taylor.
-\newblock Improved Regularization of Convolutional Neural Networks with Cutout.
-\newblock \emph{arXiv:1708.04552}, 2017.
-
-\bibitem{cubuk2019autoaugment}
-E.~Cubuk et al.
-\newblock AutoAugment: Learning Augmentation Strategies from Data.
-\newblock \emph{CVPR}, 2019.
-
-\bibitem{cubuk2020randaugment}
-E.~Cubuk et al.
-\newblock RandAugment: Practical Automated Data Augmentation with a Reduced Search Space.
-\newblock \emph{NeurIPS}, 2020.
-
-\bibitem{he2016resnet}
-K.~He et al.
-\newblock Deep Residual Learning for Image Recognition.
-\newblock \emph{CVPR}, 2016.
-
-\bibitem{hinton2015distilling}
-G.~Hinton, O.~Vinyals, and J.~Dean.
-\newblock Distilling the Knowledge in a Neural Network.
-\newblock \emph{NeurIPS Deep Learning Workshop}, 2015.
-
-\bibitem{zhu2017ttq}
-C.~Zhu, S.~Han, H.~Mao, and W.~Dally.
-\newblock Trained Ternary Quantization.
-\newblock \emph{ICLR}, 2017.
-
-\bibitem{kim2019qkd}
-J.~Kim, Y.~Bhalgat, J.~Lee, C.~Patel, and N.~Kwak.
-\newblock QKD: Quantization-aware Knowledge Distillation.
-\newblock \emph{arXiv:1911.12491}, 2019.
-
-\bibitem{wang2019haq}
-K.~Wang, Z.~Liu, Y.~Lin, J.~Lin, and S.~Han.
-\newblock HAQ: Hardware-Aware Automated Quantization with Mixed Precision.
-\newblock \emph{CVPR}, 2019.
-
-\bibitem{nielsen2024bitnetreloaded}
-J.~Nielsen and P.~Schneider-Kamp.
-\newblock BitNet b1.58 Reloaded: State-of-the-art Performance Also on Smaller Networks.
-\newblock \emph{arXiv:2407.09527}, 2024.
-
-\bibitem{kim2025bdnet}
-D.~Kim, J.-S.~Lee, N.-r.~Kim, S.~Lee, and J.-H.~Lee.
-\newblock BD-Net: Has Depth-Wise Convolution Ever Been Applied in Binary Neural Networks?
-\newblock \emph{AAAI}, 2026.
-
-\bibitem{zhou2016dorefa}
-S.~Zhou, Y.~Wu, Z.~Ni, X.~Zhou, H.~Wen, and Y.~Zou.
-\newblock DoReFa-Net: Training Low Bitwidth Convolutional Neural Networks with Low Bitwidth Gradients.
-\newblock \emph{arXiv:1606.06160}, 2016.
-
-\bibitem{dong2019hawq}
-Z.~Dong, Z.~Yao, A.~Gholami, M.~Mahoney, and K.~Keutzer.
-\newblock HAWQ: Hessian AWare Quantization of Neural Networks with Mixed-Precision.
-\newblock \emph{ICCV}, 2019.
-
-\bibitem{elthakeb2020dcq}
-A.~T.~Elthakeb, P.~Pilligundla, F.~Mireshghallah, A.~Alaghi, and H.~Esmaeilzadeh.
-\newblock Divide and Conquer: Leveraging Intermediate Feature Representations for Quantized Training of Neural Networks.
-\newblock \emph{arXiv:1906.06033}, 2020.
-
-\bibitem{le2015tinyimagenet}
-Y.~Le and X.~Yang.
-\newblock Tiny ImageNet Visual Recognition Challenge.
-\newblock CS 231N, Stanford University, 2015.
-
-\bibitem{liu2020reactnet}
-Z.~Liu, Z.~Shen, M.~Savvides, and K.-T.~Cheng.
-\newblock ReActNet: Towards Precise Binary Neural Network with Generalized Activation Functions.
-\newblock \emph{ECCV}, 2020.
-
-\bibitem{guo2022bnext}
-N.~Guo, J.~Bethge, C.~Meinel, and H.~Yang.
-\newblock BNext: Any Precision Binary Neural Network.
-\newblock \emph{arXiv:2211.12933}, 2022.
-
-\bibitem{dong2020hawqv2}
-Z.~Dong, Z.~Yao, D.~Arfeen, A.~Gholami, M.~W.~Mahoney, and K.~Keutzer.
-\newblock HAWQ-V2: Hessian Aware trace-Weighted Quantization of Neural Networks.
-\newblock \emph{NeurIPS}, 2020.
-
-\bibitem{qin2020irnet}
-H.~Qin, R.~Gong, X.~Liu, M.~Shen, Z.~Wei, F.~Yu, and J.~Song.
-\newblock Forward and Backward Information Retention for Accurate Binary Neural Networks.
-\newblock \emph{CVPR}, 2020.
-
-\bibitem{zhao2024sqakd}
-K.~Zhao and M.~Zhao.
-\newblock SQAKD: Self-supervised Quantization-Aware Knowledge Distillation.
-\newblock \emph{AISTATS}, 2024.
-
-\bibitem{bengio2013estimating}
-Y.~Bengio, N.~Léonard, and A.~Courville.
-\newblock Estimating or Propagating Gradients Through Stochastic Neurons for Conditional Computation.
-\newblock \emph{arXiv:1308.3432}, 2013.
-
-\bibitem{kim2021relaxloss}
-J.~Kim, S.~Bhattacharjee, S.~Park, S.~Jung, and Y.~M.~Kim.
-\newblock RelaxLoss: Penalty-Free Quantization through Normalized Loss.
-\newblock \emph{arXiv:2105.00944}, 2021.
-
-\bibitem{zhang2018mixup}
-H.~Zhang, M.~Cisse, Y.~N.~Dauphin, and D.~Lopez-Paz.
-\newblock mixup: Beyond Empirical Risk Minimization.
-\newblock \emph{ICLR}, 2018.
-
-\bibitem{gundersen2018reproducibility}
-O.~E.~Gundersen and S.~Kjensmo.
-\newblock State of the Art: Reproducibility in Artificial Intelligence.
-\newblock \emph{AAAI}, 2018.
-
-\bibitem{hutson2018ai}
-M.~Hutson.
-\newblock Artificial Intelligence Faces Reproducibility Crisis.
-\newblock \emph{Science}, 359(6377):725--726, 2018.
-
-\bibitem{dodge2019mlchecklist}
-J.~Dodge, S.~Gururangan, D.~Card, R.~Schwartz, and N.~A.~Smith.
-\newblock Show Your Work: Improved Reporting of Experimental Results.
-\newblock \emph{EMNLP}, 2019.
-
-\bibitem{tflite}
-TensorFlow Lite Team.
-\newblock TensorFlow Lite: Deploy machine learning models on mobile and edge devices.
-\newblock \url{https://www.tensorflow.org/lite}, 2023.
-
-\bibitem{pytorch_mobile}
-PyTorch Team.
-\newblock PyTorch Mobile: End-to-end workflow from training to deployment.
-\newblock \url{https://pytorch.org/mobile/}, 2023.
-
-\bibitem{bitblas}
-L.~Wang, L.~Lei, L.~Ye, Y.~Zhao, W.~Chen, D.~Lin, X.~Zheng, and C.~Yu.
-\newblock BitBLAS: A High-Performance Library for Quantized Deep Learning.
-\newblock \emph{arXiv:2410.16144}, 2024.
-
-\bibitem{pytorch_cifar_kuangliu}
-K.~Liu.
-\newblock Train CIFAR10 with PyTorch.
-\newblock \url{https://github.com/kuangliu/pytorch-cifar}, 2017.
-
-\bibitem{pytorch_cifar100_weiaicunzai}
-W.~Zhang.
-\newblock PyTorch CIFAR-100 Benchmark.
-\newblock \url{https://github.com/weiaicunzai/pytorch-cifar100}, 2019.
-
-\bibitem{tiny_imagenet_benchmark}
-Y.~Le and X.~Yang.
-\newblock Tiny ImageNet Visual Recognition Challenge.
-\newblock CS 231N, Stanford University, 2015.
-
-\bibitem{cover2006information}
-T.~M.~Cover and J.~A.~Thomas.
-\newblock Elements of Information Theory, 2nd Edition.
-\newblock Wiley-Interscience, 2006.
-
-\end{thebibliography}
+\bibliographystyle{tmlr}
+\bibliography{references}
@@ -24,10 +24,6 @@
 \newcommand{\real}{\mathbb{R}}
 \newcommand{\expect}{\mathbb{E}}
 
-% Highlight for TODOs (remove before submission)
-\newcommand{\todo}[1]{\textcolor{red}{[TODO: #1]}}
-\newcommand{\fixme}[1]{\textcolor{orange}{[FIXME: #1]}}
-
 % Recovery tier colors for Table 8
 \definecolor{recovhigh}{HTML}{C8E6C9}   % green  - recovery >= 80%
 \definecolor{recovmed}{HTML}{FFF9C4}    % yellow - recovery 50-79%
 
@@ -1,10 +1,8 @@
-% TMLR Submission: Understanding and Closing the 1.58-bit Quantization Gap in CNNs
-% New version with modular sections
-
 \documentclass[10pt]{article}
-
-% Page geometry
-\usepackage[margin=1in, top=0.9in, bottom=0.9in]{geometry}
+\PassOptionsToPackage{table}{xcolor}
+\usepackage{tmlr}
+% If accepted, instead use the following line for the camera-ready submission:
+%\usepackage[accepted]{tmlr}
 
 % Import packages and macros
 \input{preamble}
@@ -13,19 +11,20 @@
 % Graphics path for figures
 \graphicspath{{figures/}}
 
+\def\month{MM}
+\def\year{YYYY}
+\def\openreview{\url{https://openreview.net/forum?id=XXXX}}
+
 % =============================================================================
 % METADATA
 % =============================================================================
 
-\title{Understanding and Closing the 1.58-bit Quantization Gap in CNNs:\\An Empirical Study}
-
-\author{
-  Dario Cazzani\\
-  Cisco Systems Inc.\\
-  \texttt{dcazzani@cisco.com}
-}
+\title{Understanding and Closing the 1.58-bit Quantization Gap in CNNs: An Empirical Study}
 
-\date{}
+% Authors must not appear in the submitted version.
+% Non-anonymous submissions will be rejected without review.
+\author{\name Dario Cazzani \email dcazzani@cisco.com \\
+      \addr Cisco Systems Inc.}
 
 % =============================================================================
 % DOCUMENT
 
@@ -1,14 +1,12 @@
-% Shared preamble — common packages for all venues
-% Excludes \documentclass and geometry (venue-specific)
+% Shared preamble — TMLR-compatible packages
+% Note: tmlr.sty already provides fontenc[T1], natbib, and fancyhdr
 
 % Core packages
 \usepackage[utf8]{inputenc}
-\usepackage[T1]{fontenc}
 \usepackage{amsmath,amssymb,amsfonts}
 \usepackage{graphicx}
 \usepackage{booktabs}
 \usepackage{multirow}
-\usepackage[table]{xcolor}
 \usepackage{hyperref}
 \usepackage{cleveref}