I am David Grangier, welcome to my homepage. I am currently with the Machine Learning Group at Apple Research, in Cupertino, California. I am interested in large scale Machine Learning and its application to pattern analysis tasks, such as Information Retrieval, Speech Recognition and Natural Language Processing. My research is described in below, while my resume and my linkedin give an overview of my previous experiences.
@inproceedings{grangier2025crisp, title = {Task-Adaptive Pretrained Language Models via Clustered-Importance Sampling}, author = {Grangier, David and Fan, Simin and Seto, Skyler and Ablin, Pierre}, booktitle = {Proceedings of the International Conference on Learning Representations (ICLR)}, year = {2025}, url={https://doi.org/10.48550/arXiv.2410.03735}, }
@inproceedings{pagliardini2025ademamix, author={Matteo Pagliardini and Pierre Ablin and David Grangier}, title={The AdEMAMix Optimizer: Better, Faster, Older}, booktitle = {Proceedings of the International Conference on Learning Representations (ICLR)}, year={2025}, url={https://doi.org/10.48550/arXiv.2409.03137}, }
@inproceedings{filippova2025no, title={No Need to Talk: Asynchronous Mixture of Language Models}, author={Anastasiia Filippova and Angelos Katharopoulos and David Grangier and Ronan Collobert}, booktitle = {Proceedings of the International Conference on Learning Representations (ICLR)}, year={2025}, url={https://arxiv.org/abs/2410.03529}, }
@article{ablin2025soup, author={Pierre Ablin and Angelos Katharopoulos and Skyler Seto and David Grangier}, title={Soup-of-Experts: Pretraining Specialist Models via Parameters Averaging}, journal={arXiv}, volume={2502.01804}, year={2025}, url={https://doi.org/10.48550/arXiv.2502.01804}, }
@article{fan2024dga, author={Simin Fan and David Grangier and Pierre Ablin}, title={Dynamic Gradient Alignment for Online Data Mixing}, journal={arXiv}, volume={2410.02498}, year={2024}, url={https://doi.org/10.48550/arXiv.2410.02498}, }
@article{grangier2024slm, author={David Grangier and Angelos Katharopoulos and Pierre Ablin and Awni Hannun}, title={Need a Small Specialized Language Model? Plan Early!}, journal={arXiv}, volume={2402.01093}, year={2024}, url={https://doi.org/10.48550/arXiv.2402.01093}, }
@article{seto2024bilingual, title={Training Bilingual LMs with Data Constraints in the Targeted Language}, author={Seto, Skyler and ter Hoeve, Maartje and Bai, He and Schluter, Natalie and Grangier, David}, journal={arXiv}, volume={2411.12986}, year={2024}, url={https://arxiv.org/abs/2411.12986}, }
@inproceedings{grangier2024projected, title={Projected Language Models: A Large Model Pre-Segmented Into Smaller Ones}, author={Grangier, David and Katharopoulos, Angelos and Ablin, Pierre and Hannun, Awni}, booktitle={ICML 2024 FM-Wild Workshop}, year={2024}, url={https://openreview.net/forum?id=Wi88giKi7N}, }
@inproceedings{huang2024aggregate, author={Chen Huang and Skyler Seto and Samira Abnar and David Grangier and Navdeep Jaitly and Josh Susskind}, title={Aggregate-and-Adapt Natural Language Prompts for Downstream Generalization of CLIP}, booktitle={Advances in Neural Information Processing Systems}, year={2024}, url={https://neurips.cc/virtual/2024/poster/94659}, }
@inproceedings{maini2024rephrase, author={Pratyush Maini and Skyler Seto and Richard He Bai and David Grangier and Yizhe Zhang and Navdeep Jaitly}, title={Rephrasing the Web: {A} Recipe for Compute and Data-Efficient Language Modeling}, booktitle={Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (ACL)}, year={2024}, pages={14044--14072}, }
@article{grangier2024adaptive, title={Adaptive Training Distributions with Scalable Online Bilevel Optimization}, author={Grangier, David and Ablin, Pierre and Hannun, Awni}, journal={Transactions on Machine Learning Research (TMLR)}, year={2024}, url={https://openreview.net/forum?id=JP1GVyF5i5}, }
@article{audio-lm-generation-2023, author={Borsos, Zalán and Marinier, Raphaël and Vincent, Damien and Kharitonov, Eugene and Pietquin, Olivier and Sharifi, Matt and Roblek, Dominik and Teboul, Olivier and Grangier, David and Tagliasacchi, Marco and Zeghidour, Neil}, journal={IEEE/ACM Transactions on Audio, Speech, and Language Processing}, title={AudioLM: A Language Modeling Approach to Audio Generation}, year={2023}, volume={31}, number={}, pages={2523-2533}, keywords={Semantics;Acoustics;Training;Computational modeling;Codecs;Predictive models;Task analysis;Computer generated music;speech synthesis}, doi={10.1109/TASLP.2023.3288409} }
@inproceedings{dery-grangier-hannun-structured-pruning, title={Transfer Learning for Structured Pruning under Limited Task Data}, author={Lucio Dery and David Grangier and Awni Hannun}, year={2023}, booktitle = {Third Workshop on Efficient Natural Language and Speech Processing (ENLSP-III)}, }
@inproceedings{grangier-ablin-hannun-bilevel-learn-lm-distribution, title={Bilevel Optimization to Learn Training Distributions for Language Modeling under Domain Shift}, author={David Grangier and Pierre Ablin and Awni Hannun}, year={2023}, booktitle = {NeurIPS 2023 Workshop on Distribution Shifts}, }
@inproceedings{grangier2022tradeoffs, title={The Trade-offs of Domain Adaptation for Neural Language Models}, author={David Grangier and Dan Iter}, year={2022}, booktitle = {Proceedings of the Annual Meeting of the Association for Computational Linguistics (ACL)}, }
@inproceedings{freitag-natural-diet-translation-2022, title={A Natural Diet: Towards Improving Naturalness of Machine Translation}, author={Markus Freitag and David Vilar and David Grangier and Colin Cherry and George Foster}, booktitle={Findings of the Annual Meeting of the Association for Computational Linguistics (ACL)}, year={2022}, }
@inproceedings{riad-learning-strides-convnets-2022, title={Learning Strides in Convolutional Neural Networks}, author={Rachid Riad and Olivier Teboul and David Grangier and Neil Zeghidour}, booktitle={International Conference on Learning Representation (ICLR)}, year={2022}, }
@article{freitag-etal-2022-high, title = "High Quality Rather than High Model Probability: Minimum {B}ayes Risk Decoding with Neural Metrics", author = "Freitag, Markus and Grangier, David and Tan, Qijun and Liang, Bowen", editor = "Roark, Brian and Nenkova, Ani", journal = "Transactions of the Association for Computational Linguistics", volume = "10", year = "2022", url = "https://aclanthology.org/2022.tacl-1.47/" }
@article{freitag2021experts, title={Experts, Errors, and Context: A Large-Scale Study of Human Evaluation for Machine Translation}, author={Markus Freitag and George Foster and David Grangier and Viresh Ratnakar and Qijun Tan and Wolfgang Macherey}, journal={Transactions of the Association for Computational Linguistics (TACL)}, year={2021}, }
@inproceedings{zeghidour-teboul-grangier-dive-2021, title={DIVE: End-to-end Speech Diarization via Iterative Speaker Embedding}, author={Neil Zeghidour and Olivier Teboul and David Grangier}, booktitle = {{IEEE} Automatic Speech Recognition and Understanding Workshop ({ASRU})}, year={2021}, }
@inproceedings{ldery-aux-taks-iclr21, title={Auxiliary Task Update Decomposition: The Good, The Bad and The Neutral}, author={Lucio Dery and Yann Dauphin and David Grangier}, booktitle={International Conference on Learning Representation (ICLR)}, year={2021}, }
@article{zeghidour-grangier-wavesplit-2021, title={Wavesplit: End-to-End Speech Separation by Speaker Clustering}, author={Neil Zeghidour and David Grangier}, journal = {{IEEE} {ACM} Transaction on Audio Speech and Language Processing (TASLP)}, year={2021}, }
@inproceedings{saeeds-contrastive-audio-2021, title={Contrastive Learning of General-Purpose Audio Representations}, author={Aaqib Saeed and David Grangier and Neil Zeghidour}, booktitle={International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, year={2021}, }
@article{saeeds-eeg-reordering-2021, title={Learning from Heterogeneous EEG Signals with Differentiable Channel Reordering}, author={Aaqib Saeed and David Grangier and Olivier Pietquin and Neil Zeghidour}, booktitle={International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, year={2021}, }
@misc{iter2021complementarity, title={On the Complementarity of Data Selection and Fine Tuning for Domain Adaptation}, author={Dan Iter and David Grangier}, year={2021}, eprint={2109.07591}, archivePrefix={arXiv}, primaryClass={cs.CL} }
@misc{marchisio2021unsupervised, title={What Can Unsupervised Machine Translation Contribute to High-Resource Language Pairs?}, author={Kelly Marchisio and Markus Freitag and David Grangier}, year={2021}, eprint={2106.15818}, archivePrefix={arXiv}, primaryClass={cs.CL} }
@article{aurkoroy-routing-transformer-2020, title={Efficient Content-Based Sparse Attention with Routing Transformers}, author={Aurko Roy and Mohammad Saffar and Ashish Vaswani and David Grangier}, journal={Transactions of the Association for Computational Linguistics (TACL) }, year={2020}, }
@inproceedings{freitag-paraphrase-mert-2020, title={Human-Paraphrased References Improve Neural Machine Translation}, author={Markus Freitag and George Foster and David Grangier and Colin Cherry}, booktitle={Conference on Machine Translation (WMT)}, year={2020}, }
@article{freitag-bleu-paraphrase-references-2020, title={BLEU might be Guilty but References are not Innocent}, author={Markus Freitag and David Grangier and Isaac Caswell}, booktitle={Conference on Empirical Methods in Natural Language Processing (EMNLP)}, year={2020}, }
@article{riley-translationese-2020, title={Translationese as a Language in "Multilingual" {NMT}}, author={Parker Riley and Isaac Caswell and Markus Freitag and David Grangier}, booktitle = {Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics}, year = {2020}, }
@inproceedings{ippolito-storyline-2020, title={Towards Better Storylines with Sentence-Level Language Models}, author={Daphne Ippolito and David Grangier and Douglas Eck and Chris Callison-Burch}, booktitle={Annual Meeting of the Association for Computational Linguistics (ACL)}, year={2020}, }
@inproceedings{fan-etal-2019-eli5, title = "{ELI}5: Long Form Question Answering", author = "Fan, Angela and Jernite, Yacine and Perez, Ethan and Grangier, David and Weston, Jason and Auli, Michael", booktitle = "Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics", year = "2019", url = "https://www.aclweb.org/anthology/P19-1346" }
@inproceedings{caswell:tagbt:2019, author = {Isaac Caswell and Ciprian Chelba and David Grangier}, title = {Tagged Back-Translation}, year = {2019}, booktitle = {Conference on Machine Translation (WMT)}, }
@inproceedings{aurkoroy:paraphrase:2019, author = {Aurko Roy and David Grangier}, title = {Unsupervised Paraphrasing without Translation}, year = {2019}, booktitle = {Conference of the Association for Computational Linguistics (ACL)}, }
@inproceedings{ottedunov:fairseq:2019, author = {Myle Ott, Sergey Edunov, Alexei Baevski, Angela Fan, Sam Gross, Nathan Ng, David Grangier, Michael Auli}, title = {fairseq: A Fast, Extensible Toolkit for Sequence Modeling}, year = {2019}, booktitle = {Demo of Conference of the North American Chapter of the Association for Computational Linguistics (NAACL Demo)}, }
@inproceedings{ippolito-etal-2019-unsupervised, title = "Unsupervised Hierarchical Story Infilling", author = "Ippolito, Daphne and Grangier, David and Callison-Burch, Chris and Eck, Douglas", booktitle = "Proceedings of the First Workshop on Narrative Understanding @ NAACL", year = "2019", url = "https://www.aclweb.org/anthology/W19-2405"}
@inproceedings{pavllo:3dpose:2018, author = {Dario Pavllo and Christoph Feichtenhofer and David Grangier and Michael Auli}, title = {3D human pose estimation in video with temporal convolutions and semi-supervised training}, year = {2019}, booktitle = {Conference on Computer Vision and Pattern Recognition (CVPR)}, }
@article{edunov:backtranslation:2018, author = {Sergey Edunov Myle Ott and Michael Auli and David Grangier}, title = {Understanding Back-Translation at Scale}, year = {2018}, booktitle = {Conference on Empirical Methods in Natural Language Processing ({EMNLP})}, }
@inproceedings{ott:scaling:2018, author = {Myle Ott and Sergey Edunov and David Grangier and Michael Auli}, title = {Scaling Neural Machine Translation}, year = {2018}, booktitle = {Workshop on Machine Translation ({WMT@EMNLP})}, }
@inproceedings{pavllo:quaternet:2018, author = {Dario Pavllo and David Grangier and Michael Auli}, title = {QuaterNet: A Quaternion-based Recurrent Model for Human Motion}, year = {2018}, booktitle = {British Machine Vision Conference (BMVC)}, }
@inproceedings{ott:uncertainty:2018, author = {Myle Ott and Michael Auli and David Grangier and Marc’Aurelio Ranzato}, title = {Analyzing Uncertainty in Neural Machine Translation}, year = {2018}, booktitle = {International Conference on Machine Learning {(ICML)}}, }
@inproceedings{fan:summarization:2018, author = {Angela Fan and David Grangier and Michael Auli}, title = {Controllable Abstractive Summarization}, year = {2018}, booktitle = {ACL Workshop on Neural Machine Translation and Generation (NMT@ACL)}, }
@inproceedings{edunovott:structured:2018, author = {Sergey Edunov and Myle Ott and Michael Auli and David Grangier and Marc’Aurelio Ranzato}, title = {Classical Structured Prediction Losses for Sequence to Sequence Learning}, year = {2018}, booktitle = {Conference of the North American Chapter of the Association for Computational Linguistics (NAACL)}, }
@article{grangier:quickedit:2018, author = {David Grangier and Michael Auli}, title = {QuickEdit: Editing Text and Translations by Crossing Words Out}, year = {2018}, booktitle = {Conference of the North American Chapter of the Association for Computational Linguistics (NAACL)}, }
@inproceedings{gehring:convs2s:2017, author = {Jonas Gehring, Michael Auli, David Grangier, Denis Yarats, Yann N. Dauphin}, title = {Convolutional Sequence to Sequence Learning}, year = {2017}, booktitle = {International Conference on Machine Learning {(ICML)}} }
@inproceedings{dauphin:gatedlm:2017, author = {Yann N. Dauphin and Angela Fan and Michael Auli and David Grangier}, title = {Language Modeling with Gated Convolutional Networks}, year = {2017}, booktitle = {International Conference on Machine Learning {(ICML)}} }
@inproceedings{grave:softmax:2017, author = {Edouard Grave and Armand Joulin and Moustapha Cisse and David Grangier and Herve Jegou}, title = {Strategies for Training Large Vocabulary Neural Language Models}, year = {2017}, booktitle = {International Conference on Machine Learning {(ICML)}} }
@inproceedings{gehring:convnmt:2017, author = {Jonas Gehring and Michael Auli and David Grangier and Yann N. Dauphin}, title = {A Convolutional Encoder Model for Neural Machine Translation}, year = {2017}, booktitle = {Conference of the Association for Computational Linguistics ({ACL})} }
@article{novak:refinement:2016, author = {Roman Novak and Michael Auli and David Grangier}, title = {Iterative Refinement for Machine Translation}, year = {2016}, booktitle = {arxiv} }
@article{lhostis:selectionmt:2016, author = {Gurvan L'Hostis and David Grangier and Michael Auli}, title = {Vocabulary Selection Strategies for Neural Machine Translation}, year = {2016}, booktitle = {arxiv} }
@inproceedings{lebret:emnlp:2016, author = {Remi Lebret and David Grangier and Michael Auli}, title = {Neural Generation of Text from Structured Data with Application to the Bibliography Domain}, year = {2016}, booktitle = {Conference on Empirical Methods in Natural Language Processing (EMNLP)} }
@inproceedings{chen:acl:2016, author = {Wenlin Chen and David Grangier and Michael Auli}, title = {Strategies for Training Large Vocabulary Neural Language Models}, year = {2016}, booktitle = {Conference of the Association for Computational Linguistics (ACL)} }
@inproceedings{jandot:2016:whi, author = {Camille Jandot and Patrice Simard and Max Chickering and David Grangier and Jina Suh}, title = {Interactive Semantic Featuring for Text Classification}, year = {2016}, booktitle = { ICML Workshop on Human Interpretability in Machine Learning (WHI)} }
@inproceedings{dauphin:2016:iclr, author = {Yann N. Dauphin and David Grangier}, title = {Predicting distributions with Linearizing Belief Networks}, year = {2016}, booktitle = {International Conference on Learning Representation (ICLR)} }
@article{simard:2014:ice, author = {Patrice Y. Simard and David Maxwell Chickering and Aparna Lakshmiratan and Denis Xavier Charles and L{\'{e}}on Bottou and Carlos Garcia Jurado Suarez and David Grangier and Saleema Amershi and Johan Verwey and Jina Suh}, title = {{ICE:} Enabling Non-Experts to Build Models Interactively for Large-Scale Lopsided Problems}, journal = {CoRR}, volume = {abs/1409.4814}, year = {2014}, url = {http://arxiv.org/abs/1409.4814}, timestamp = {Thu, 02 Oct 2014 07:52:03 +0200}, biburl = {http://dblp.uni-trier.de/rec/bib/journals/corr/SimardCLCBSGAVS14}, bibsource = {dblp computer science bibliography, http://dblp.org} }
@article{shi:2013:gbc, title={GBC: gradient boosting consensus model for heterogeneous data}, author={Shi, Xiaoxiao and Paiement, Jean-Francois and Grangier, David and Yu, Philip S}, journal={Statistical Analysis and Data Mining}, year={2013}, publisher={Wiley Online Library} }
@inproceedings{shi:2012:heterogeneous_gbdt_sdm, author = "X. Shi and JF. Paiement and D. Grangier and P. Yu", title = "Learning from Heterogeneous Sources via Gradient Boosting Consensus", booktitle = "SIAM International Conference on Data Mining (SDM)", year = "2012", }
@inproceedings{grangier:2010:missing_nips, author = "D. Grangier and I. Melvin", title = "Feature Set Embedding for Incomplete Data", booktitle = "Advances in Neural Information Processing Systems (NIPS)", year = "2010", }
@inproceedings{weston:2010:label_trees_nips, author = "J. Weston and S. Bengio and D. Grangier", title = "Label Embedding Trees for Large Multi-Class Tasks", booktitle = "Advances in Neural Information Processing Systems (NIPS)", year = "2010", }
@inproceedings{bai:2010:halftrans_aistats, author = "B. Bai and J. Weston and D. Grangier and R. Collobert and C. Cortes and M. Mohri", title = "Half Transductive Ranking", booktitle = "Artificial Intelligence and Statistics (AISTATS)", year = "2010", }
@inproceedings{bai:2009:halftrans_nips, author = "B. Bai and J. Weston and D. Grangier and R. Collobert and C. Cortes and M. Mohri", title = "Ranking with Half Transductive Models", booktitle = "NIPS Workshop on Advances in Ranking", year = "2009", }
@inproceedings{bai:2009:psi_nips, author = "B. Bai and J. Weston and D. Grangier and R. Collobert and K. Sadamasa and Y. Qi and C. Cortes and M. Mohri", title = "Polynomial Semantic Indexing", booktitle = "Advances in Neural Information Processing Systems (NIPS)", year = "2009", }
@article{bai:2009:ssi_jir, author = "B. Bai and J. Weston and D. Grangier and R. Collobert and Y. Qi and K. Sadamasa and O. Chapelle and K. Weinberger", title = "Learning to Rank with (a Lot of) Word Features", journal = "Information Retrieval -- Special Issue on Learning to Rank", publisher = "Springer", year = "2009", }
@inproceedings{bai:2009:ssi_cikm, author = "B. Bai and J. Weston and D. Grangier and R. Collobert and Y. Qi and K. Sadamasa and O. Chapelle and K. Weinberger", title = "Supervised Semantic Indexing", booktitle = "ACM Conference on Information and Knowledge Management (CIKM)", year = "2009", }
@inproceedings{bai:2009:ssi_ecir, author = "B. Bai and J.Weston and R. Collobert and D. Grangier", title = "Supervised Semantic Indexing", booktitle = "European Conference on Information Retrieval (ECIR)", year = "2009", }
@incollection{grangier:2009:kws_book, author = "D. Grangier, J. Keshet and S. Bengio", title = "Discriminative Keyword Spotting", booktitle = "Automatic Speech and Speaker Recognition: Large Margin and Kernel Methods", editor = "J. Keshet and S. Bengio", publisher = "Wiley", year = "2009", }
@phdthesis{grangier:2008:phd_thesis, author = "D. Grangier", title = "Machine Learning for Information Retrieval", number = "4088", school = "Ecole Polytechnique Federale de Lausanne", year = "2008", }
@article{grangier:2008:kws_journal, author = "J. Keshet and D. Grangier and S. Bengio", title = "Discriminative Keyword Spotting", journal = "Speech Communication", year = "2008", }
@article{grangier:2008:tpami, author = "D. Grangier and S. Bengio", title = "A Discriminative Kernel-based Model to Rank Images from Text Queries", journal = "IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI)", year = "2008", }
@inproceedings{grangier:2007:rr_07-15, author = "D. Grangier and S. Bengio", title = "Learning the Inter-frame Distance for Discriminative Template-based Keyword Detection", booktitle = "International Conference on Speech Processing (INTERSPEECH)", year = "2007", }
@inproceedings{keshet:2007:nolisp, author = "J. Keshet, D. Grangier and S. Bengio", title = "Discriminative Keyword Spotting", booktitle = "International Workshop on Non-LInear Speech Processing (NOLISP)", year = "2007", }
@inproceedings{grangier:2006:icann, author = "D. Grangier and S. Bengio", title = "A Neural Network to Retrieve Images from Text Queries", booktitle = "International Conference on Artificial Neural Networks (ICANN)}", year = "2006", }
@inproceedings{grangier:2006:amr, author = "D. Grangier and F. Monay and S. Bengio", title = "Learning to Retrieve Images from Text Queries with a Discriminative Model", booktitle = "International Workshop on Adaptive Multimedia Retrieval (AMR)", year = "2006", }
@inproceedings{grangier:2006:ecml, author = "D. Grangier and F. Monay and S. Bengio", title = "A Discriminative Approach for the Retrieval of Images from Text Queries", booktitle = "European Conference on Machine Learning (ECML)", year = "2006", }
@techreport{grangier:2005:idiap-05-67, author = "D. Grangier and S. Bengio", title = "A Discriminative Decoder for the Recognition of Phoneme Sequences", number = "67", institution = "IDIAP", year = "2005", }
@inproceedings{grangier:2005:nips_workshop, author = "D. Grangier and S. Bengio", title = "Exploiting Hyperlinks to Learn a Retrieval Model", booktitle = "NIPS Workshop on Learning to Rank", year = "2005", }
@inproceedings{grangier:2005:cikm, author = "D. Grangier and S. Bengio", title = "Inferring Document Similarity from Hyperlinks", booktitle = "ACM Conference on Information and Knowledge Management (CIKM)", year = "2005", }
@inproceedings{grangier:2005:icme, author = "D. Grangier and A. Vinciarelli", title = "Effect of Segmentation Method on Video Retrieval Performance", booktitle = "IEEE International Conference on Multimedia and Expo (ICME)", year = "2005", }
@techreport{grangier:2004:idiap-04-82, author = "D. Grangier and A. Vinciarelli", title = "Effect of Recognition Errors on Text Clustering", number = "82", institution = "IDIAP", year = "2004", }
@techreport{com-03-08, author = "D. Grangier and A. Vinciarelli and H. Bourlard", title = "Information Retrieval on Noisy Text", number = "8", Keywords = "Information Retrieval, Speech, Spoken Documents Retrieval, Noisy Text", institution = "IDIAP", year = "2003", }