mirror of
				https://github.com/huggingface/transformers.git
				synced 2025-10-21 01:23:56 +08:00 
			
		
		
		
	Compare commits
	
		
			12 Commits
		
	
	
		
			check-send
			...
			v4.33.3
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| bffac926ca | |||
| 0e2cf025fa | |||
| c9b9d87e93 | |||
| 7c9a1b6ade | |||
| 6da93f5580 | |||
| b033d1a679 | |||
| 2ba46c140b | |||
| 8160e4270b | |||
| 118c676ef3 | |||
| 5a4f340df7 | |||
| 5a6fa3ef09 | |||
| 19a323ee4a | 
							
								
								
									
										4
									
								
								.github/conda/meta.yaml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										4
									
								
								.github/conda/meta.yaml
									
									
									
									
										vendored
									
									
								
							| @ -26,6 +26,8 @@ requirements: | ||||
|     - protobuf | ||||
|     - tokenizers >=0.11.1,!=0.11.3,<0.13 | ||||
|     - pyyaml >=5.1 | ||||
|     - safetensors | ||||
|     - fsspec | ||||
|   run: | ||||
|     - python | ||||
|     - numpy >=1.17 | ||||
| @ -40,6 +42,8 @@ requirements: | ||||
|     - protobuf | ||||
|     - tokenizers >=0.11.1,!=0.11.3,<0.13 | ||||
|     - pyyaml >=5.1 | ||||
|     - safetensors | ||||
|     - fsspec | ||||
|  | ||||
| test: | ||||
|   imports: | ||||
|  | ||||
| @ -435,7 +435,7 @@ Current number of checkpoints: ** (from Google) released with the paper [Pix2Struct: Screenshot Parsing as Pretraining for Visual Language Understanding](https://arxiv.org/abs/2210.03347) by Kenton Lee, Mandar Joshi, Iulia Turc, Hexiang Hu, Fangyu Liu, Julian Eisenschlos, Urvashi Khandelwal, Peter Shaw, Ming-Wei Chang, Kristina Toutanova. | ||||
| 1. **[PLBart](https://huggingface.co/docs/transformers/model_doc/plbart)** (from UCLA NLP) released with the paper [Unified Pre-training for Program Understanding and Generation](https://arxiv.org/abs/2103.06333) by Wasi Uddin Ahmad, Saikat Chakraborty, Baishakhi Ray, Kai-Wei Chang. | ||||
| 1. **[PoolFormer](https://huggingface.co/docs/transformers/model_doc/poolformer)** (from Sea AI Labs) released with the paper [MetaFormer is Actually What You Need for Vision](https://arxiv.org/abs/2111.11418) by Yu, Weihao and Luo, Mi and Zhou, Pan and Si, Chenyang and Zhou, Yichen and Wang, Xinchao and Feng, Jiashi and Yan, Shuicheng. | ||||
| 1. **[Pop2Piano](https://huggingface.co/docs/transformers/main/model_doc/pop2piano)** released with the paper [Pop2Piano : Pop Audio-based Piano Cover Generation](https://arxiv.org/abs/2211.00895) by Jongho Choi and Kyogu Lee. | ||||
| 1. **[Pop2Piano](https://huggingface.co/docs/transformers/model_doc/pop2piano)** released with the paper [Pop2Piano : Pop Audio-based Piano Cover Generation](https://arxiv.org/abs/2211.00895) by Jongho Choi and Kyogu Lee. | ||||
| 1. **[ProphetNet](https://huggingface.co/docs/transformers/model_doc/prophetnet)** (from Microsoft Research) released with the paper [ProphetNet: Predicting Future N-gram for Sequence-to-Sequence Pre-training](https://arxiv.org/abs/2001.04063) by Yu Yan, Weizhen Qi, Yeyun Gong, Dayiheng Liu, Nan Duan, Jiusheng Chen, Ruofei Zhang and Ming Zhou. | ||||
| 1. **[PVT](https://huggingface.co/docs/transformers/model_doc/pvt)** (from Nanjing University, The University of Hong Kong etc.) released with the paper [Pyramid Vision Transformer: A Versatile Backbone for Dense Prediction without Convolutions](https://arxiv.org/pdf/2102.12122.pdf) by Wenhai Wang, Enze Xie, Xiang Li, Deng-Ping Fan, Kaitao Song, Ding Liang, Tong Lu, Ping Luo, Ling Shao. | ||||
| 1. **[QDQBert](https://huggingface.co/docs/transformers/model_doc/qdqbert)** (from NVIDIA) released with the paper [Integer Quantization for Deep Learning Inference: Principles and Empirical Evaluation](https://arxiv.org/abs/2004.09602) by Hao Wu, Patrick Judd, Xiaojie Zhang, Mikhail Isaev and Paulius Micikevicius. | ||||
| @ -486,10 +486,10 @@ Current number of checkpoints: ](https://huggingface.co/docs/transformers/model_doc/vit)** (from Google AI) released with the paper [An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale](https://arxiv.org/abs/2010.11929) by Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob Uszkoreit, Neil Houlsby. | ||||
| 1. **[VisualBERT](https://huggingface.co/docs/transformers/model_doc/visual_bert)** (from UCLA NLP) released with the paper [VisualBERT: A Simple and Performant Baseline for Vision and Language](https://arxiv.org/pdf/1908.03557) by Liunian Harold Li, Mark Yatskar, Da Yin, Cho-Jui Hsieh, Kai-Wei Chang. | ||||
| 1. **[ViT Hybrid](https://huggingface.co/docs/transformers/model_doc/vit_hybrid)** (from Google AI) released with the paper [An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale](https://arxiv.org/abs/2010.11929) by Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob Uszkoreit, Neil Houlsby. | ||||
| 1. **[VitDet](https://huggingface.co/docs/transformers/main/model_doc/vitdet)** (from Meta AI) released with the paper [Exploring Plain Vision Transformer Backbones for Object Detection](https://arxiv.org/abs/2203.16527) by Yanghao Li, Hanzi Mao, Ross Girshick, Kaiming He. | ||||
| 1. **[VitDet](https://huggingface.co/docs/transformers/model_doc/vitdet)** (from Meta AI) released with the paper [Exploring Plain Vision Transformer Backbones for Object Detection](https://arxiv.org/abs/2203.16527) by Yanghao Li, Hanzi Mao, Ross Girshick, Kaiming He. | ||||
| 1. **[ViTMAE](https://huggingface.co/docs/transformers/model_doc/vit_mae)** (from Meta AI) released with the paper [Masked Autoencoders Are Scalable Vision Learners](https://arxiv.org/abs/2111.06377) by Kaiming He, Xinlei Chen, Saining Xie, Yanghao Li, Piotr Dollár, Ross Girshick. | ||||
| 1. **[ViTMSN](https://huggingface.co/docs/transformers/model_doc/vit_msn)** (from Meta AI) released with the paper [Masked Siamese Networks for Label-Efficient Learning](https://arxiv.org/abs/2204.07141) by Mahmoud Assran, Mathilde Caron, Ishan Misra, Piotr Bojanowski, Florian Bordes, Pascal Vincent, Armand Joulin, Michael Rabbat, Nicolas Ballas. | ||||
| 1. **[VITS](https://huggingface.co/docs/transformers/main/model_doc/vits)** (from Kakao Enterprise) released with the paper [Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech](https://arxiv.org/abs/2106.06103) by Jaehyeon Kim, Jungil Kong, Juhee Son. | ||||
| 1. **[VITS](https://huggingface.co/docs/transformers/model_doc/vits)** (from Kakao Enterprise) released with the paper [Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech](https://arxiv.org/abs/2106.06103) by Jaehyeon Kim, Jungil Kong, Juhee Son. | ||||
| 1. **[ViViT](https://huggingface.co/docs/transformers/model_doc/vivit)** (from Google Research) released with the paper [ViViT: A Video Vision Transformer](https://arxiv.org/abs/2103.15691) by Anurag Arnab, Mostafa Dehghani, Georg Heigold, Chen Sun, Mario Lučić, Cordelia Schmid. | ||||
| 1. **[Wav2Vec2](https://huggingface.co/docs/transformers/model_doc/wav2vec2)** (from Facebook AI) released with the paper [wav2vec 2.0: A Framework for Self-Supervised Learning of Speech Representations](https://arxiv.org/abs/2006.11477) by Alexei Baevski, Henry Zhou, Abdelrahman Mohamed, Michael Auli. | ||||
| 1. **[Wav2Vec2-Conformer](https://huggingface.co/docs/transformers/model_doc/wav2vec2-conformer)** (from Facebook AI) released with the paper [FAIRSEQ S2T: Fast Speech-to-Text Modeling with FAIRSEQ](https://arxiv.org/abs/2010.05171) by Changhan Wang, Yun Tang, Xutai Ma, Anne Wu, Sravya Popuri, Dmytro Okhonko, Juan Pino. | ||||
|  | ||||
| @ -412,7 +412,7 @@ Número actual de puntos de control: ** (from Google) released with the paper [Pix2Struct: Screenshot Parsing as Pretraining for Visual Language Understanding](https://arxiv.org/abs/2210.03347) by Kenton Lee, Mandar Joshi, Iulia Turc, Hexiang Hu, Fangyu Liu, Julian Eisenschlos, Urvashi Khandelwal, Peter Shaw, Ming-Wei Chang, Kristina Toutanova. | ||||
| 1. **[PLBart](https://huggingface.co/docs/transformers/model_doc/plbart)** (from UCLA NLP) released with the paper [Unified Pre-training for Program Understanding and Generation](https://arxiv.org/abs/2103.06333) by Wasi Uddin Ahmad, Saikat Chakraborty, Baishakhi Ray, Kai-Wei Chang. | ||||
| 1. **[PoolFormer](https://huggingface.co/docs/transformers/model_doc/poolformer)** (from Sea AI Labs) released with the paper [MetaFormer is Actually What You Need for Vision](https://arxiv.org/abs/2111.11418) by Yu, Weihao and Luo, Mi and Zhou, Pan and Si, Chenyang and Zhou, Yichen and Wang, Xinchao and Feng, Jiashi and Yan, Shuicheng. | ||||
| 1. **[Pop2Piano](https://huggingface.co/docs/transformers/main/model_doc/pop2piano)** released with the paper [Pop2Piano : Pop Audio-based Piano Cover Generation](https://arxiv.org/abs/2211.00895) by Jongho Choi, Kyogu Lee.  | ||||
| 1. **[Pop2Piano](https://huggingface.co/docs/transformers/model_doc/pop2piano)** released with the paper [Pop2Piano : Pop Audio-based Piano Cover Generation](https://arxiv.org/abs/2211.00895) by Jongho Choi, Kyogu Lee.  | ||||
| 1. **[ProphetNet](https://huggingface.co/docs/transformers/model_doc/prophetnet)** (from Microsoft Research) released with the paper [ProphetNet: Predicting Future N-gram for Sequence-to-Sequence Pre-training](https://arxiv.org/abs/2001.04063) by Yu Yan, Weizhen Qi, Yeyun Gong, Dayiheng Liu, Nan Duan, Jiusheng Chen, Ruofei Zhang and Ming Zhou. | ||||
| 1. **[PVT](https://huggingface.co/docs/transformers/model_doc/pvt)** (from Nanjing University, The University of Hong Kong etc.) released with the paper [Pyramid Vision Transformer: A Versatile Backbone for Dense Prediction without Convolutions](https://arxiv.org/pdf/2102.12122.pdf) by Wenhai Wang, Enze Xie, Xiang Li, Deng-Ping Fan, Kaitao Song, Ding Liang, Tong Lu, Ping Luo, Ling Shao. | ||||
| 1. **[QDQBert](https://huggingface.co/docs/transformers/model_doc/qdqbert)** (from NVIDIA) released with the paper [Integer Quantization for Deep Learning Inference: Principles and Empirical Evaluation](https://arxiv.org/abs/2004.09602) by Hao Wu, Patrick Judd, Xiaojie Zhang, Mikhail Isaev and Paulius Micikevicius. | ||||
| @ -463,10 +463,10 @@ Número actual de puntos de control: ](https://huggingface.co/docs/transformers/model_doc/vit)** (from Google AI) released with the paper [An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale](https://arxiv.org/abs/2010.11929) by Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob Uszkoreit, Neil Houlsby. | ||||
| 1. **[VisualBERT](https://huggingface.co/docs/transformers/model_doc/visual_bert)** (from UCLA NLP) released with the paper [VisualBERT: A Simple and Performant Baseline for Vision and Language](https://arxiv.org/pdf/1908.03557) by Liunian Harold Li, Mark Yatskar, Da Yin, Cho-Jui Hsieh, Kai-Wei Chang. | ||||
| 1. **[ViT Hybrid](https://huggingface.co/docs/transformers/model_doc/vit_hybrid)** (from Google AI) released with the paper [An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale](https://arxiv.org/abs/2010.11929) by Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob Uszkoreit, Neil Houlsby. | ||||
| 1. **[VitDet](https://huggingface.co/docs/transformers/main/model_doc/vitdet)** (from Meta AI) released with the paper [Exploring Plain Vision Transformer Backbones for Object Detection](https://arxiv.org/abs/2203.16527) by Yanghao Li, Hanzi Mao, Ross Girshick, Kaiming He. | ||||
| 1. **[VitDet](https://huggingface.co/docs/transformers/model_doc/vitdet)** (from Meta AI) released with the paper [Exploring Plain Vision Transformer Backbones for Object Detection](https://arxiv.org/abs/2203.16527) by Yanghao Li, Hanzi Mao, Ross Girshick, Kaiming He. | ||||
| 1. **[ViTMAE](https://huggingface.co/docs/transformers/model_doc/vit_mae)** (from Meta AI) released with the paper [Masked Autoencoders Are Scalable Vision Learners](https://arxiv.org/abs/2111.06377) by Kaiming He, Xinlei Chen, Saining Xie, Yanghao Li, Piotr Dollár, Ross Girshick. | ||||
| 1. **[ViTMSN](https://huggingface.co/docs/transformers/model_doc/vit_msn)** (from Meta AI) released with the paper [Masked Siamese Networks for Label-Efficient Learning](https://arxiv.org/abs/2204.07141) by Mahmoud Assran, Mathilde Caron, Ishan Misra, Piotr Bojanowski, Florian Bordes, Pascal Vincent, Armand Joulin, Michael Rabbat, Nicolas Ballas. | ||||
| 1. **[VITS](https://huggingface.co/docs/transformers/main/model_doc/vits)** (from Kakao Enterprise) released with the paper [Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech](https://arxiv.org/abs/2106.06103) by Jaehyeon Kim, Jungil Kong, Juhee Son. | ||||
| 1. **[VITS](https://huggingface.co/docs/transformers/model_doc/vits)** (from Kakao Enterprise) released with the paper [Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech](https://arxiv.org/abs/2106.06103) by Jaehyeon Kim, Jungil Kong, Juhee Son. | ||||
| 1. **[ViViT](https://huggingface.co/docs/transformers/model_doc/vivit)** (from Google Research) released with the paper [ViViT: A Video Vision Transformer](https://arxiv.org/abs/2103.15691) by Anurag Arnab, Mostafa Dehghani, Georg Heigold, Chen Sun, Mario Lučić, Cordelia Schmid. | ||||
| 1. **[Wav2Vec2](https://huggingface.co/docs/transformers/model_doc/wav2vec2)** (from Facebook AI) released with the paper [wav2vec 2.0: A Framework for Self-Supervised Learning of Speech Representations](https://arxiv.org/abs/2006.11477) by Alexei Baevski, Henry Zhou, Abdelrahman Mohamed, Michael Auli. | ||||
| 1. **[Wav2Vec2-Conformer](https://huggingface.co/docs/transformers/model_doc/wav2vec2-conformer)** (from Facebook AI) released with the paper [FAIRSEQ S2T: Fast Speech-to-Text Modeling with FAIRSEQ](https://arxiv.org/abs/2010.05171) by Changhan Wang, Yun Tang, Xutai Ma, Anne Wu, Sravya Popuri, Dmytro Okhonko, Juan Pino. | ||||
|  | ||||
| @ -384,7 +384,7 @@ conda install -c huggingface transformers | ||||
| 1. **[Pix2Struct](https://huggingface.co/docs/transformers/model_doc/pix2struct)** (Google से) Kenton Lee, Mandar Joshi, Iulia Turc, Hexiang Hu, Fangyu Liu, Julian Eisenschlos, Urvashi Khandelwal, Peter Shaw, Ming-Wei Chang, Kristina Toutanova. द्वाराअनुसंधान पत्र [Pix2Struct: Screenshot Parsing as Pretraining for Visual Language Understanding](https://arxiv.org/abs/2210.03347) के साथ जारी किया गया | ||||
| 1. **[PLBart](https://huggingface.co/docs/transformers/model_doc/plbart)** (UCLA NLP से) साथ वाला पेपर [प्रोग्राम अंडरस्टैंडिंग एंड जेनरेशन के लिए यूनिफाइड प्री-ट्रेनिंग](https://arxiv .org/abs/2103.06333) वसी उद्दीन अहमद, सैकत चक्रवर्ती, बैशाखी रे, काई-वेई चांग द्वारा। | ||||
| 1. **[PoolFormer](https://huggingface.co/docs/transformers/model_doc/poolformer)** (from Sea AI Labs) released with the paper [MetaFormer is Actually What You Need for Vision](https://arxiv.org/abs/2111.11418) by Yu, Weihao and Luo, Mi and Zhou, Pan and Si, Chenyang and Zhou, Yichen and Wang, Xinchao and Feng, Jiashi and Yan, Shuicheng. | ||||
| 1. **[Pop2Piano](https://huggingface.co/docs/transformers/main/model_doc/pop2piano)** released with the paper [Pop2Piano : Pop Audio-based Piano Cover Generation](https://arxiv.org/abs/2211.00895) by Jongho Choi, Kyogu Lee.  | ||||
| 1. **[Pop2Piano](https://huggingface.co/docs/transformers/model_doc/pop2piano)** released with the paper [Pop2Piano : Pop Audio-based Piano Cover Generation](https://arxiv.org/abs/2211.00895) by Jongho Choi, Kyogu Lee.  | ||||
| 1. **[ProphetNet](https://huggingface.co/docs/transformers/model_doc/prophetnet)** (माइक्रोसॉफ्ट रिसर्च से) साथ में पेपर [ProphetNet: प्रेडिक्टिंग फ्यूचर एन-ग्राम फॉर सीक्वेंस-टू-सीक्वेंस प्री-ट्रेनिंग ](https://arxiv.org/abs/2001.04063) यू यान, वीज़ेन क्यूई, येयुन गोंग, दयाहेंग लियू, नान डुआन, जिउशेंग चेन, रुओफ़ेई झांग और मिंग झोउ द्वारा पोस्ट किया गया। | ||||
| 1. **[PVT](https://huggingface.co/docs/transformers/model_doc/pvt)** (Nanjing University, The University of Hong Kong etc. से) Wenhai Wang, Enze Xie, Xiang Li, Deng-Ping Fan, Kaitao Song, Ding Liang, Tong Lu, Ping Luo, Ling Shao. द्वाराअनुसंधान पत्र [Pyramid Vision Transformer: A Versatile Backbone for Dense Prediction without Convolutions](https://arxiv.org/pdf/2102.12122.pdf) के साथ जारी किया गया | ||||
| 1. **[QDQBert](https://huggingface.co/docs/transformers/model_doc/qdqbert)** (NVIDIA से) साथ वाला पेपर [डीप लर्निंग इंफ़ेक्शन के लिए इंटीजर क्वांटिज़ेशन: प्रिंसिपल्स एंड एम्पिरिकल इवैल्यूएशन](https:// arxiv.org/abs/2004.09602) हाओ वू, पैट्रिक जुड, जिआओजी झांग, मिखाइल इसेव और पॉलियस माइकेविसियस द्वारा। | ||||
| @ -435,10 +435,10 @@ conda install -c huggingface transformers | ||||
| 1. **[Vision Transformer (ViT)](https://huggingface.co/docs/transformers/model_doc/vit)** (गूगल एआई से) कागज के साथ [एक इमेज इज़ वर्थ 16x16 वर्ड्स: ट्रांसफॉर्मर्स फॉर इमेज रिकॉग्निशन एट स्केल](https://arxiv.org/abs/2010.11929) एलेक्सी डोसोवित्स्की, लुकास बेयर, अलेक्जेंडर कोलेसनिकोव, डिर्क वीसेनबोर्न, शियाओहुआ झाई, थॉमस अनटरथिनर, मुस्तफा देहघानी, मैथियास मिंडरर, जॉर्ज हेगोल्ड, सिल्वेन गेली, जैकब उस्ज़कोरेइट द्वारा हॉल्सबी द्वारा पोस्ट किया गया। | ||||
| 1. **[VisualBERT](https://huggingface.co/docs/transformers/model_doc/visual_bert)** (UCLA NLP से) साथ वाला पेपर [VisualBERT: A Simple and Performant Baseline for Vision and Language](https:/ /arxiv.org/pdf/1908.03557) लियुनियन हेरोल्ड ली, मार्क यात्स्कर, दा यिन, चो-जुई हसीह, काई-वेई चांग द्वारा। | ||||
| 1. **[ViT Hybrid](https://huggingface.co/docs/transformers/model_doc/vit_hybrid)** (from Google AI) released with the paper [An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale](https://arxiv.org/abs/2010.11929) by Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob Uszkoreit, Neil Houlsby. | ||||
| 1. **[VitDet](https://huggingface.co/docs/transformers/main/model_doc/vitdet)** (Meta AI से) Yanghao Li, Hanzi Mao, Ross Girshick, Kaiming He. द्वाराअनुसंधान पत्र [Exploring Plain Vision Transformer Backbones for Object Detection](https://arxiv.org/abs/2203.16527) के साथ जारी किया गया | ||||
| 1. **[VitDet](https://huggingface.co/docs/transformers/model_doc/vitdet)** (Meta AI से) Yanghao Li, Hanzi Mao, Ross Girshick, Kaiming He. द्वाराअनुसंधान पत्र [Exploring Plain Vision Transformer Backbones for Object Detection](https://arxiv.org/abs/2203.16527) के साथ जारी किया गया | ||||
| 1. **[ViTMAE](https://huggingface.co/docs/transformers/model_doc/vit_mae)** (मेटा एआई से) साथ में कागज [मास्कड ऑटोएन्कोडर स्केलेबल विजन लर्नर्स हैं](https://arxiv.org/ एब्स/2111.06377) कैमिंग हे, ज़िनेली चेन, सेनिंग ज़ी, यांगहो ली, पिओट्र डॉलर, रॉस गिर्शिक द्वारा। | ||||
| 1. **[ViTMSN](https://huggingface.co/docs/transformers/model_doc/vit_msn)** (मेटा एआई से) साथ में कागज [लेबल-कुशल सीखने के लिए मास्क्ड स्याम देश के नेटवर्क](https://arxiv. org/abs/2204.07141) महमूद असरान, मथिल्डे कैरन, ईशान मिश्रा, पियोट्र बोजानोवस्की, फ्लोरियन बोर्डेस, पास्कल विंसेंट, आर्मंड जौलिन, माइकल रब्बत, निकोलस बल्लास द्वारा। | ||||
| 1. **[VITS](https://huggingface.co/docs/transformers/main/model_doc/vits)** (Kakao Enterprise से) Jaehyeon Kim, Jungil Kong, Juhee Son. द्वाराअनुसंधान पत्र [Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech](https://arxiv.org/abs/2106.06103) के साथ जारी किया गया | ||||
| 1. **[VITS](https://huggingface.co/docs/transformers/model_doc/vits)** (Kakao Enterprise से) Jaehyeon Kim, Jungil Kong, Juhee Son. द्वाराअनुसंधान पत्र [Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech](https://arxiv.org/abs/2106.06103) के साथ जारी किया गया | ||||
| 1. **[ViViT](https://huggingface.co/docs/transformers/model_doc/vivit)** (from Google Research) released with the paper [ViViT: A Video Vision Transformer](https://arxiv.org/abs/2103.15691) by Anurag Arnab, Mostafa Dehghani, Georg Heigold, Chen Sun, Mario Lučić, Cordelia Schmid. | ||||
| 1. **[Wav2Vec2](https://huggingface.co/docs/transformers/model_doc/wav2vec2)** (फेसबुक एआई से) साथ में पेपर [wav2vec 2.0: ए फ्रेमवर्क फॉर सेल्फ-सुपरवाइज्ड लर्निंग ऑफ स्पीच रिप्रेजेंटेशन] (https://arxiv.org/abs/2006.11477) एलेक्सी बेवस्की, हेनरी झोउ, अब्देलरहमान मोहम्मद, माइकल औली द्वारा। | ||||
| 1. **[Wav2Vec2-Conformer](https://huggingface.co/docs/transformers/model_doc/wav2vec2-conformer)** (Facebook AI से) साथ वाला पेपर [FAIRSEQ S2T: FAIRSEQ के साथ फास्ट स्पीच-टू-टेक्स्ट मॉडलिंग ](https://arxiv.org/abs/2010.05171) चांगहान वांग, यूं तांग, जुताई मा, ऐनी वू, सरव्या पोपुरी, दिमित्रो ओखोनको, जुआन पिनो द्वारा पोस्ट किया गया। | ||||
|  | ||||
| @ -446,7 +446,7 @@ Flax、PyTorch、TensorFlowをcondaでインストールする方法は、それ | ||||
| 1. **[Pix2Struct](https://huggingface.co/docs/transformers/model_doc/pix2struct)** (Google から) Kenton Lee, Mandar Joshi, Iulia Turc, Hexiang Hu, Fangyu Liu, Julian Eisenschlos, Urvashi Khandelwal, Peter Shaw, Ming-Wei Chang, Kristina Toutanova. から公開された研究論文 [Pix2Struct: Screenshot Parsing as Pretraining for Visual Language Understanding](https://arxiv.org/abs/2210.03347) | ||||
| 1. **[PLBart](https://huggingface.co/docs/transformers/model_doc/plbart)** (UCLA NLP から) Wasi Uddin Ahmad, Saikat Chakraborty, Baishakhi Ray, Kai-Wei Chang から公開された研究論文: [Unified Pre-training for Program Understanding and Generation](https://arxiv.org/abs/2103.06333) | ||||
| 1. **[PoolFormer](https://huggingface.co/docs/transformers/model_doc/poolformer)** (Sea AI Labs から) Yu, Weihao and Luo, Mi and Zhou, Pan and Si, Chenyang and Zhou, Yichen and Wang, Xinchao and Feng, Jiashi and Yan, Shuicheng から公開された研究論文: [MetaFormer is Actually What You Need for Vision](https://arxiv.org/abs/2111.11418) | ||||
| 1. **[Pop2Piano](https://huggingface.co/docs/transformers/main/model_doc/pop2piano)** released with the paper [Pop2Piano : Pop Audio-based Piano Cover Generation](https://arxiv.org/abs/2211.00895) by Jongho Choi, Kyogu Lee.  | ||||
| 1. **[Pop2Piano](https://huggingface.co/docs/transformers/model_doc/pop2piano)** released with the paper [Pop2Piano : Pop Audio-based Piano Cover Generation](https://arxiv.org/abs/2211.00895) by Jongho Choi, Kyogu Lee.  | ||||
| 1. **[ProphetNet](https://huggingface.co/docs/transformers/model_doc/prophetnet)** (Microsoft Research から) Yu Yan, Weizhen Qi, Yeyun Gong, Dayiheng Liu, Nan Duan, Jiusheng Chen, Ruofei Zhang and Ming Zhou から公開された研究論文: [ProphetNet: Predicting Future N-gram for Sequence-to-Sequence Pre-training](https://arxiv.org/abs/2001.04063) | ||||
| 1. **[PVT](https://huggingface.co/docs/transformers/model_doc/pvt)** (Nanjing University, The University of Hong Kong etc. から) Wenhai Wang, Enze Xie, Xiang Li, Deng-Ping Fan, Kaitao Song, Ding Liang, Tong Lu, Ping Luo, Ling Shao. から公開された研究論文 [Pyramid Vision Transformer: A Versatile Backbone for Dense Prediction without Convolutions](https://arxiv.org/pdf/2102.12122.pdf) | ||||
| 1. **[QDQBert](https://huggingface.co/docs/transformers/model_doc/qdqbert)** (NVIDIA から) Hao Wu, Patrick Judd, Xiaojie Zhang, Mikhail Isaev and Paulius Micikevicius から公開された研究論文: [Integer Quantization for Deep Learning Inference: Principles and Empirical Evaluation](https://arxiv.org/abs/2004.09602) | ||||
| @ -497,10 +497,10 @@ Flax、PyTorch、TensorFlowをcondaでインストールする方法は、それ | ||||
| 1. **[Vision Transformer (ViT)](https://huggingface.co/docs/transformers/model_doc/vit)** (Google AI から) Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob Uszkoreit, Neil Houlsby から公開された研究論文: [An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale](https://arxiv.org/abs/2010.11929) | ||||
| 1. **[VisualBERT](https://huggingface.co/docs/transformers/model_doc/visual_bert)** (UCLA NLP から) Liunian Harold Li, Mark Yatskar, Da Yin, Cho-Jui Hsieh, Kai-Wei Chang から公開された研究論文: [VisualBERT: A Simple and Performant Baseline for Vision and Language](https://arxiv.org/pdf/1908.03557) | ||||
| 1. **[ViT Hybrid](https://huggingface.co/docs/transformers/model_doc/vit_hybrid)** (Google AI から) Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob Uszkoreit, Neil Houlsby から公開された研究論文: [An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale](https://arxiv.org/abs/2010.11929) | ||||
| 1. **[VitDet](https://huggingface.co/docs/transformers/main/model_doc/vitdet)** (Meta AI から) Yanghao Li, Hanzi Mao, Ross Girshick, Kaiming He. から公開された研究論文 [Exploring Plain Vision Transformer Backbones for Object Detection](https://arxiv.org/abs/2203.16527) | ||||
| 1. **[VitDet](https://huggingface.co/docs/transformers/model_doc/vitdet)** (Meta AI から) Yanghao Li, Hanzi Mao, Ross Girshick, Kaiming He. から公開された研究論文 [Exploring Plain Vision Transformer Backbones for Object Detection](https://arxiv.org/abs/2203.16527) | ||||
| 1. **[ViTMAE](https://huggingface.co/docs/transformers/model_doc/vit_mae)** (Meta AI から) Kaiming He, Xinlei Chen, Saining Xie, Yanghao Li, Piotr Dollár, Ross Girshick から公開された研究論文: [Masked Autoencoders Are Scalable Vision Learners](https://arxiv.org/abs/2111.06377) | ||||
| 1. **[ViTMSN](https://huggingface.co/docs/transformers/model_doc/vit_msn)** (Meta AI から) Mahmoud Assran, Mathilde Caron, Ishan Misra, Piotr Bojanowski, Florian Bordes, Pascal Vincent, Armand Joulin, Michael Rabbat, Nicolas Ballas から公開された研究論文: [Masked Siamese Networks for Label-Efficient Learning](https://arxiv.org/abs/2204.07141) | ||||
| 1. **[VITS](https://huggingface.co/docs/transformers/main/model_doc/vits)** (Kakao Enterprise から) Jaehyeon Kim, Jungil Kong, Juhee Son. から公開された研究論文 [Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech](https://arxiv.org/abs/2106.06103) | ||||
| 1. **[VITS](https://huggingface.co/docs/transformers/model_doc/vits)** (Kakao Enterprise から) Jaehyeon Kim, Jungil Kong, Juhee Son. から公開された研究論文 [Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech](https://arxiv.org/abs/2106.06103) | ||||
| 1. **[ViViT](https://huggingface.co/docs/transformers/model_doc/vivit)** (from Google Research) released with the paper [ViViT: A Video Vision Transformer](https://arxiv.org/abs/2103.15691) by Anurag Arnab, Mostafa Dehghani, Georg Heigold, Chen Sun, Mario Lučić, Cordelia Schmid. | ||||
| 1. **[Wav2Vec2](https://huggingface.co/docs/transformers/model_doc/wav2vec2)** (Facebook AI から) Alexei Baevski, Henry Zhou, Abdelrahman Mohamed, Michael Auli から公開された研究論文: [wav2vec 2.0: A Framework for Self-Supervised Learning of Speech Representations](https://arxiv.org/abs/2006.11477) | ||||
| 1. **[Wav2Vec2-Conformer](https://huggingface.co/docs/transformers/model_doc/wav2vec2-conformer)** (Facebook AI から) Changhan Wang, Yun Tang, Xutai Ma, Anne Wu, Sravya Popuri, Dmytro Okhonko, Juan Pino から公開された研究論文: [FAIRSEQ S2T: Fast Speech-to-Text Modeling with FAIRSEQ](https://arxiv.org/abs/2010.05171) | ||||
|  | ||||
| @ -361,7 +361,7 @@ Flax, PyTorch, TensorFlow 설치 페이지에서 이들을 conda로 설치하는 | ||||
| 1. **[Pix2Struct](https://huggingface.co/docs/transformers/model_doc/pix2struct)** (Google 에서 제공)은 Kenton Lee, Mandar Joshi, Iulia Turc, Hexiang Hu, Fangyu Liu, Julian Eisenschlos, Urvashi Khandelwal, Peter Shaw, Ming-Wei Chang, Kristina Toutanova.의 [Pix2Struct: Screenshot Parsing as Pretraining for Visual Language Understanding](https://arxiv.org/abs/2210.03347)논문과 함께 발표했습니다. | ||||
| 1. **[PLBart](https://huggingface.co/docs/transformers/model_doc/plbart)** (UCLA NLP 에서) Wasi Uddin Ahmad, Saikat Chakraborty, Baishakhi Ray, Kai-Wei Chang 의 [Unified Pre-training for Program Understanding and Generation](https://arxiv.org/abs/2103.06333) 논문과 함께 발표했습니다. | ||||
| 1. **[PoolFormer](https://huggingface.co/docs/transformers/model_doc/poolformer)** (Sea AI Labs 에서) Yu, Weihao and Luo, Mi and Zhou, Pan and Si, Chenyang and Zhou, Yichen and Wang, Xinchao and Feng, Jiashi and Yan, Shuicheng 의 [MetaFormer is Actually What You Need for Vision](https://arxiv.org/abs/2111.11418) 논문과 함께 발표했습니다. | ||||
| 1. **[Pop2Piano](https://huggingface.co/docs/transformers/main/model_doc/pop2piano)** released with the paper [Pop2Piano : Pop Audio-based Piano Cover Generation](https://arxiv.org/abs/2211.00895) by Jongho Choi, Kyogu Lee.  | ||||
| 1. **[Pop2Piano](https://huggingface.co/docs/transformers/model_doc/pop2piano)** released with the paper [Pop2Piano : Pop Audio-based Piano Cover Generation](https://arxiv.org/abs/2211.00895) by Jongho Choi, Kyogu Lee.  | ||||
| 1. **[ProphetNet](https://huggingface.co/docs/transformers/model_doc/prophetnet)** (Microsoft Research 에서) Yu Yan, Weizhen Qi, Yeyun Gong, Dayiheng Liu, Nan Duan, Jiusheng Chen, Ruofei Zhang and Ming Zhou 의 [ProphetNet: Predicting Future N-gram for Sequence-to-Sequence Pre-training](https://arxiv.org/abs/2001.04063) 논문과 함께 발표했습니다. | ||||
| 1. **[PVT](https://huggingface.co/docs/transformers/model_doc/pvt)** (Nanjing University, The University of Hong Kong etc. 에서 제공)은 Wenhai Wang, Enze Xie, Xiang Li, Deng-Ping Fan, Kaitao Song, Ding Liang, Tong Lu, Ping Luo, Ling Shao.의 [Pyramid Vision Transformer: A Versatile Backbone for Dense Prediction without Convolutions](https://arxiv.org/pdf/2102.12122.pdf)논문과 함께 발표했습니다. | ||||
| 1. **[QDQBert](https://huggingface.co/docs/transformers/model_doc/qdqbert)** (NVIDIA 에서) Hao Wu, Patrick Judd, Xiaojie Zhang, Mikhail Isaev and Paulius Micikevicius 의 [Integer Quantization for Deep Learning Inference: Principles and Empirical Evaluation](https://arxiv.org/abs/2004.09602) 논문과 함께 발표했습니다. | ||||
| @ -412,10 +412,10 @@ Flax, PyTorch, TensorFlow 설치 페이지에서 이들을 conda로 설치하는 | ||||
| 1. **[Vision Transformer (ViT)](https://huggingface.co/docs/transformers/model_doc/vit)** (Google AI 에서) Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob Uszkoreit, Neil Houlsby 의 [An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale](https://arxiv.org/abs/2010.11929) 논문과 함께 발표했습니다. | ||||
| 1. **[VisualBERT](https://huggingface.co/docs/transformers/model_doc/visual_bert)** (UCLA NLP 에서) Liunian Harold Li, Mark Yatskar, Da Yin, Cho-Jui Hsieh, Kai-Wei Chang 의 [VisualBERT: A Simple and Performant Baseline for Vision and Language](https://arxiv.org/pdf/1908.03557) 논문과 함께 발표했습니다. | ||||
| 1. **[ViT Hybrid](https://huggingface.co/docs/transformers/model_doc/vit_hybrid)** (Google AI 에서) Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob Uszkoreit, Neil Houlsby 의 [An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale](https://arxiv.org/abs/2010.11929) 논문과 함께 발표했습니다. | ||||
| 1. **[VitDet](https://huggingface.co/docs/transformers/main/model_doc/vitdet)** (Meta AI 에서 제공)은 Yanghao Li, Hanzi Mao, Ross Girshick, Kaiming He.의 [Exploring Plain Vision Transformer Backbones for Object Detection](https://arxiv.org/abs/2203.16527)논문과 함께 발표했습니다. | ||||
| 1. **[VitDet](https://huggingface.co/docs/transformers/model_doc/vitdet)** (Meta AI 에서 제공)은 Yanghao Li, Hanzi Mao, Ross Girshick, Kaiming He.의 [Exploring Plain Vision Transformer Backbones for Object Detection](https://arxiv.org/abs/2203.16527)논문과 함께 발표했습니다. | ||||
| 1. **[ViTMAE](https://huggingface.co/docs/transformers/model_doc/vit_mae)** (Meta AI 에서) Kaiming He, Xinlei Chen, Saining Xie, Yanghao Li, Piotr Dollár, Ross Girshick 의 [Masked Autoencoders Are Scalable Vision Learners](https://arxiv.org/abs/2111.06377) 논문과 함께 발표했습니다. | ||||
| 1. **[ViTMSN](https://huggingface.co/docs/transformers/model_doc/vit_msn)** (Meta AI 에서) Mahmoud Assran, Mathilde Caron, Ishan Misra, Piotr Bojanowski, Florian Bordes, Pascal Vincent, Armand Joulin, Michael Rabbat, Nicolas Ballas 의 [Masked Siamese Networks for Label-Efficient Learning](https://arxiv.org/abs/2204.07141) 논문과 함께 발표했습니다. | ||||
| 1. **[VITS](https://huggingface.co/docs/transformers/main/model_doc/vits)** (Kakao Enterprise 에서 제공)은 Jaehyeon Kim, Jungil Kong, Juhee Son.의 [Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech](https://arxiv.org/abs/2106.06103)논문과 함께 발표했습니다. | ||||
| 1. **[VITS](https://huggingface.co/docs/transformers/model_doc/vits)** (Kakao Enterprise 에서 제공)은 Jaehyeon Kim, Jungil Kong, Juhee Son.의 [Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech](https://arxiv.org/abs/2106.06103)논문과 함께 발표했습니다. | ||||
| 1. **[ViViT](https://huggingface.co/docs/transformers/model_doc/vivit)** (from Google Research) released with the paper [ViViT: A Video Vision Transformer](https://arxiv.org/abs/2103.15691) by Anurag Arnab, Mostafa Dehghani, Georg Heigold, Chen Sun, Mario Lučić, Cordelia Schmid. | ||||
| 1. **[Wav2Vec2](https://huggingface.co/docs/transformers/model_doc/wav2vec2)** (Facebook AI 에서) Alexei Baevski, Henry Zhou, Abdelrahman Mohamed, Michael Auli 의 [wav2vec 2.0: A Framework for Self-Supervised Learning of Speech Representations](https://arxiv.org/abs/2006.11477) 논문과 함께 발표했습니다. | ||||
| 1. **[Wav2Vec2-Conformer](https://huggingface.co/docs/transformers/model_doc/wav2vec2-conformer)** (Facebook AI 에서) Changhan Wang, Yun Tang, Xutai Ma, Anne Wu, Sravya Popuri, Dmytro Okhonko, Juan Pino 의 [FAIRSEQ S2T: Fast Speech-to-Text Modeling with FAIRSEQ](https://arxiv.org/abs/2010.05171) 논문과 함께 발표했습니다. | ||||
|  | ||||
| @ -385,7 +385,7 @@ conda install -c huggingface transformers | ||||
| 1. **[Pix2Struct](https://huggingface.co/docs/transformers/model_doc/pix2struct)** (来自 Google) 伴随论文 [Pix2Struct: Screenshot Parsing as Pretraining for Visual Language Understanding](https://arxiv.org/abs/2210.03347) 由 Kenton Lee, Mandar Joshi, Iulia Turc, Hexiang Hu, Fangyu Liu, Julian Eisenschlos, Urvashi Khandelwal, Peter Shaw, Ming-Wei Chang, Kristina Toutanova 发布。 | ||||
| 1. **[PLBart](https://huggingface.co/docs/transformers/model_doc/plbart)** (来自 UCLA NLP) 伴随论文 [Unified Pre-training for Program Understanding and Generation](https://arxiv.org/abs/2103.06333) 由 Wasi Uddin Ahmad, Saikat Chakraborty, Baishakhi Ray, Kai-Wei Chang 发布。 | ||||
| 1. **[PoolFormer](https://huggingface.co/docs/transformers/model_doc/poolformer)** (来自 Sea AI Labs) 伴随论文 [MetaFormer is Actually What You Need for Vision](https://arxiv.org/abs/2111.11418) 由 Yu, Weihao and Luo, Mi and Zhou, Pan and Si, Chenyang and Zhou, Yichen and Wang, Xinchao and Feng, Jiashi and Yan, Shuicheng 发布。 | ||||
| 1. **[Pop2Piano](https://huggingface.co/docs/transformers/main/model_doc/pop2piano)** released with the paper [Pop2Piano : Pop Audio-based Piano Cover Generation](https://arxiv.org/abs/2211.00895) by Jongho Choi, Kyogu Lee.  | ||||
| 1. **[Pop2Piano](https://huggingface.co/docs/transformers/model_doc/pop2piano)** released with the paper [Pop2Piano : Pop Audio-based Piano Cover Generation](https://arxiv.org/abs/2211.00895) by Jongho Choi, Kyogu Lee.  | ||||
| 1. **[ProphetNet](https://huggingface.co/docs/transformers/model_doc/prophetnet)** (来自 Microsoft Research) 伴随论文 [ProphetNet: Predicting Future N-gram for Sequence-to-Sequence Pre-training](https://arxiv.org/abs/2001.04063) 由 Yu Yan, Weizhen Qi, Yeyun Gong, Dayiheng Liu, Nan Duan, Jiusheng Chen, Ruofei Zhang and Ming Zhou 发布。 | ||||
| 1. **[PVT](https://huggingface.co/docs/transformers/model_doc/pvt)** (来自 Nanjing University, The University of Hong Kong etc.) 伴随论文 [Pyramid Vision Transformer: A Versatile Backbone for Dense Prediction without Convolutions](https://arxiv.org/pdf/2102.12122.pdf) 由 Wenhai Wang, Enze Xie, Xiang Li, Deng-Ping Fan, Kaitao Song, Ding Liang, Tong Lu, Ping Luo, Ling Shao 发布。 | ||||
| 1. **[QDQBert](https://huggingface.co/docs/transformers/model_doc/qdqbert)** (来自 NVIDIA) 伴随论文 [Integer Quantization for Deep Learning Inference: Principles and Empirical Evaluation](https://arxiv.org/abs/2004.09602) 由 Hao Wu, Patrick Judd, Xiaojie Zhang, Mikhail Isaev and Paulius Micikevicius 发布。 | ||||
| @ -436,10 +436,10 @@ conda install -c huggingface transformers | ||||
| 1. **[Vision Transformer (ViT)](https://huggingface.co/docs/transformers/model_doc/vit)** (来自 Google AI) 伴随论文 [An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale](https://arxiv.org/abs/2010.11929) 由 Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob Uszkoreit, Neil Houlsby 发布。 | ||||
| 1. **[VisualBERT](https://huggingface.co/docs/transformers/model_doc/visual_bert)** (来自 UCLA NLP) 伴随论文 [VisualBERT: A Simple and Performant Baseline for Vision and Language](https://arxiv.org/pdf/1908.03557) 由 Liunian Harold Li, Mark Yatskar, Da Yin, Cho-Jui Hsieh, Kai-Wei Chang 发布。 | ||||
| 1. **[ViT Hybrid](https://huggingface.co/docs/transformers/model_doc/vit_hybrid)** (来自 Google AI) 伴随论文 [An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale](https://arxiv.org/abs/2010.11929) 由 Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob Uszkoreit, Neil Houlsby 发布。 | ||||
| 1. **[VitDet](https://huggingface.co/docs/transformers/main/model_doc/vitdet)** (来自 Meta AI) 伴随论文 [Exploring Plain Vision Transformer Backbones for Object Detection](https://arxiv.org/abs/2203.16527) 由 Yanghao Li, Hanzi Mao, Ross Girshick, Kaiming He 发布。 | ||||
| 1. **[VitDet](https://huggingface.co/docs/transformers/model_doc/vitdet)** (来自 Meta AI) 伴随论文 [Exploring Plain Vision Transformer Backbones for Object Detection](https://arxiv.org/abs/2203.16527) 由 Yanghao Li, Hanzi Mao, Ross Girshick, Kaiming He 发布。 | ||||
| 1. **[ViTMAE](https://huggingface.co/docs/transformers/model_doc/vit_mae)** (来自 Meta AI) 伴随论文 [Masked Autoencoders Are Scalable Vision Learners](https://arxiv.org/abs/2111.06377) 由 Kaiming He, Xinlei Chen, Saining Xie, Yanghao Li, Piotr Dollár, Ross Girshick 发布。 | ||||
| 1. **[ViTMSN](https://huggingface.co/docs/transformers/model_doc/vit_msn)** (来自 Meta AI) 伴随论文 [Masked Siamese Networks for Label-Efficient Learning](https://arxiv.org/abs/2204.07141) by Mahmoud Assran, Mathilde Caron, Ishan Misra, Piotr Bojanowski, Florian Bordes, Pascal Vincent, Armand Joulin, Michael Rabbat, Nicolas Ballas 发布. | ||||
| 1. **[VITS](https://huggingface.co/docs/transformers/main/model_doc/vits)** (来自 Kakao Enterprise) 伴随论文 [Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech](https://arxiv.org/abs/2106.06103) 由 Jaehyeon Kim, Jungil Kong, Juhee Son 发布。 | ||||
| 1. **[VITS](https://huggingface.co/docs/transformers/model_doc/vits)** (来自 Kakao Enterprise) 伴随论文 [Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech](https://arxiv.org/abs/2106.06103) 由 Jaehyeon Kim, Jungil Kong, Juhee Son 发布。 | ||||
| 1. **[ViViT](https://huggingface.co/docs/transformers/model_doc/vivit)** (来自 Google Research) released with the paper [ViViT: A Video Vision Transformer](https://arxiv.org/abs/2103.15691) 由 Anurag Arnab, Mostafa Dehghani, Georg Heigold, Chen Sun, Mario Lučić, Cordelia Schmid. | ||||
| 1. **[Wav2Vec2](https://huggingface.co/docs/transformers/model_doc/wav2vec2)** (来自 Facebook AI) 伴随论文 [wav2vec 2.0: A Framework for Self-Supervised Learning of Speech Representations](https://arxiv.org/abs/2006.11477) 由 Alexei Baevski, Henry Zhou, Abdelrahman Mohamed, Michael Auli 发布。 | ||||
| 1. **[Wav2Vec2-Conformer](https://huggingface.co/docs/transformers/model_doc/wav2vec2-conformer)** (来自 Facebook AI) 伴随论文 [FAIRSEQ S2T: Fast Speech-to-Text Modeling with FAIRSEQ](https://arxiv.org/abs/2010.05171) 由 Changhan Wang, Yun Tang, Xutai Ma, Anne Wu, Sravya Popuri, Dmytro Okhonko, Juan Pino 发布。 | ||||
|  | ||||
| @ -397,7 +397,7 @@ conda install -c huggingface transformers | ||||
| 1. **[Pix2Struct](https://huggingface.co/docs/transformers/model_doc/pix2struct)** (from Google) released with the paper [Pix2Struct: Screenshot Parsing as Pretraining for Visual Language Understanding](https://arxiv.org/abs/2210.03347) by Kenton Lee, Mandar Joshi, Iulia Turc, Hexiang Hu, Fangyu Liu, Julian Eisenschlos, Urvashi Khandelwal, Peter Shaw, Ming-Wei Chang, Kristina Toutanova. | ||||
| 1. **[PLBart](https://huggingface.co/docs/transformers/model_doc/plbart)** (from UCLA NLP) released with the paper [Unified Pre-training for Program Understanding and Generation](https://arxiv.org/abs/2103.06333) by Wasi Uddin Ahmad, Saikat Chakraborty, Baishakhi Ray, Kai-Wei Chang. | ||||
| 1. **[PoolFormer](https://huggingface.co/docs/transformers/model_doc/poolformer)** (from Sea AI Labs) released with the paper [MetaFormer is Actually What You Need for Vision](https://arxiv.org/abs/2111.11418) by Yu, Weihao and Luo, Mi and Zhou, Pan and Si, Chenyang and Zhou, Yichen and Wang, Xinchao and Feng, Jiashi and Yan, Shuicheng. | ||||
| 1. **[Pop2Piano](https://huggingface.co/docs/transformers/main/model_doc/pop2piano)** released with the paper [Pop2Piano : Pop Audio-based Piano Cover Generation](https://arxiv.org/abs/2211.00895) by Jongho Choi, Kyogu Lee.  | ||||
| 1. **[Pop2Piano](https://huggingface.co/docs/transformers/model_doc/pop2piano)** released with the paper [Pop2Piano : Pop Audio-based Piano Cover Generation](https://arxiv.org/abs/2211.00895) by Jongho Choi, Kyogu Lee.  | ||||
| 1. **[ProphetNet](https://huggingface.co/docs/transformers/model_doc/prophetnet)** (from Microsoft Research) released with the paper [ProphetNet: Predicting Future N-gram for Sequence-to-Sequence Pre-training](https://arxiv.org/abs/2001.04063) by Yu Yan, Weizhen Qi, Yeyun Gong, Dayiheng Liu, Nan Duan, Jiusheng Chen, Ruofei Zhang and Ming Zhou. | ||||
| 1. **[PVT](https://huggingface.co/docs/transformers/model_doc/pvt)** (from Nanjing University, The University of Hong Kong etc.) released with the paper [Pyramid Vision Transformer: A Versatile Backbone for Dense Prediction without Convolutions](https://arxiv.org/pdf/2102.12122.pdf) by Wenhai Wang, Enze Xie, Xiang Li, Deng-Ping Fan, Kaitao Song, Ding Liang, Tong Lu, Ping Luo, Ling Shao. | ||||
| 1. **[QDQBert](https://huggingface.co/docs/transformers/model_doc/qdqbert)** (from NVIDIA) released with the paper [Integer Quantization for Deep Learning Inference: Principles and Empirical Evaluation](https://arxiv.org/abs/2004.09602) by Hao Wu, Patrick Judd, Xiaojie Zhang, Mikhail Isaev and Paulius Micikevicius. | ||||
| @ -448,10 +448,10 @@ conda install -c huggingface transformers | ||||
| 1. **[Vision Transformer (ViT)](https://huggingface.co/docs/transformers/model_doc/vit)** (from Google AI) released with the paper [An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale](https://arxiv.org/abs/2010.11929) by Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob Uszkoreit, Neil Houlsby. | ||||
| 1. **[VisualBERT](https://huggingface.co/docs/transformers/model_doc/visual_bert)** (from UCLA NLP) released with the paper [VisualBERT: A Simple and Performant Baseline for Vision and Language](https://arxiv.org/pdf/1908.03557) by Liunian Harold Li, Mark Yatskar, Da Yin, Cho-Jui Hsieh, Kai-Wei Chang. | ||||
| 1. **[ViT Hybrid](https://huggingface.co/docs/transformers/model_doc/vit_hybrid)** (from Google AI) released with the paper [An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale](https://arxiv.org/abs/2010.11929) by Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob Uszkoreit, Neil Houlsby. | ||||
| 1. **[VitDet](https://huggingface.co/docs/transformers/main/model_doc/vitdet)** (from Meta AI) released with the paper [Exploring Plain Vision Transformer Backbones for Object Detection](https://arxiv.org/abs/2203.16527) by Yanghao Li, Hanzi Mao, Ross Girshick, Kaiming He. | ||||
| 1. **[VitDet](https://huggingface.co/docs/transformers/model_doc/vitdet)** (from Meta AI) released with the paper [Exploring Plain Vision Transformer Backbones for Object Detection](https://arxiv.org/abs/2203.16527) by Yanghao Li, Hanzi Mao, Ross Girshick, Kaiming He. | ||||
| 1. **[ViTMAE](https://huggingface.co/docs/transformers/model_doc/vit_mae)** (from Meta AI) released with the paper [Masked Autoencoders Are Scalable Vision Learners](https://arxiv.org/abs/2111.06377) by Kaiming He, Xinlei Chen, Saining Xie, Yanghao Li, Piotr Dollár, Ross Girshick. | ||||
| 1. **[ViTMSN](https://huggingface.co/docs/transformers/model_doc/vit_msn)** (from Meta AI) released with the paper [Masked Siamese Networks for Label-Efficient Learning](https://arxiv.org/abs/2204.07141) by Mahmoud Assran, Mathilde Caron, Ishan Misra, Piotr Bojanowski, Florian Bordes, Pascal Vincent, Armand Joulin, Michael Rabbat, Nicolas Ballas. | ||||
| 1. **[VITS](https://huggingface.co/docs/transformers/main/model_doc/vits)** (from Kakao Enterprise) released with the paper [Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech](https://arxiv.org/abs/2106.06103) by Jaehyeon Kim, Jungil Kong, Juhee Son. | ||||
| 1. **[VITS](https://huggingface.co/docs/transformers/model_doc/vits)** (from Kakao Enterprise) released with the paper [Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech](https://arxiv.org/abs/2106.06103) by Jaehyeon Kim, Jungil Kong, Juhee Son. | ||||
| 1. **[ViViT](https://huggingface.co/docs/transformers/model_doc/vivit)** (from Google Research) released with the paper [ViViT: A Video Vision Transformer](https://arxiv.org/abs/2103.15691) by Anurag Arnab, Mostafa Dehghani, Georg Heigold, Chen Sun, Mario Lučić, Cordelia Schmid. | ||||
| 1. **[Wav2Vec2](https://huggingface.co/docs/transformers/model_doc/wav2vec2)** (from Facebook AI) released with the paper [wav2vec 2.0: A Framework for Self-Supervised Learning of Speech Representations](https://arxiv.org/abs/2006.11477) by Alexei Baevski, Henry Zhou, Abdelrahman Mohamed, Michael Auli. | ||||
| 1. **[Wav2Vec2-Conformer](https://huggingface.co/docs/transformers/model_doc/wav2vec2-conformer)** (from Facebook AI) released with the paper [FAIRSEQ S2T: Fast Speech-to-Text Modeling with FAIRSEQ](https://arxiv.org/abs/2010.05171) by Changhan Wang, Yun Tang, Xutai Ma, Anne Wu, Sravya Popuri, Dmytro Okhonko, Juan Pino. | ||||
|  | ||||
| @ -25,7 +25,7 @@ If you are not aware of what tools and agents are in the context of transformers | ||||
|  | ||||
| <Tip warning={true}> | ||||
|  | ||||
| Transformers Agent is an experimental API that is subject to change at any time. Results returned by the agents | ||||
| Transformers Agents is an experimental API that is subject to change at any time. Results returned by the agents | ||||
| can vary as the APIs or underlying models are prone to change. | ||||
|  | ||||
| </Tip> | ||||
|  | ||||
| @ -18,7 +18,7 @@ rendered properly in your Markdown viewer. | ||||
|  | ||||
| <Tip warning={true}> | ||||
|  | ||||
| Transformers Agent is an experimental API which is subject to change at any time. Results returned by the agents | ||||
| Transformers Agents is an experimental API which is subject to change at any time. Results returned by the agents | ||||
| can vary as the APIs or underlying models are prone to change. | ||||
|  | ||||
| </Tip> | ||||
|  | ||||
| @ -14,11 +14,11 @@ rendered properly in your Markdown viewer. | ||||
|  | ||||
| --> | ||||
|  | ||||
| # Transformers Agent | ||||
| # Transformers Agents | ||||
|  | ||||
| <Tip warning={true}> | ||||
|  | ||||
| Transformers Agent is an experimental API which is subject to change at any time. Results returned by the agents | ||||
| Transformers Agents is an experimental API which is subject to change at any time. Results returned by the agents | ||||
| can vary as the APIs or underlying models are prone to change. | ||||
|  | ||||
| </Tip> | ||||
|  | ||||
| @ -20,7 +20,7 @@ Transformers와 관련하여 어떤 도구와 에이전트가 있는지 잘 모 | ||||
|  | ||||
| <Tip warning={true}> | ||||
|  | ||||
| Transformers Agent는 실험 중인 API로 언제든지 변경될 수 있습니다.  | ||||
| Transformers Agents는 실험 중인 API로 언제든지 변경될 수 있습니다.  | ||||
| API 또는 기반 모델이 변경되기 쉽기 때문에 에이전트가 반환하는 결과도 달라질 수 있습니다. | ||||
|  | ||||
| </Tip> | ||||
|  | ||||
| @ -62,7 +62,7 @@ from transformers.utils import check_min_version, send_example_telemetry | ||||
| logger = logging.getLogger(__name__) | ||||
|  | ||||
| # Will error if the minimal version of Transformers is not installed. Remove at your own risks. | ||||
| check_min_version("4.33.0.dev0") | ||||
| check_min_version("4.33.0") | ||||
|  | ||||
| Array = Any | ||||
| Dataset = datasets.arrow_dataset.Dataset | ||||
|  | ||||
| @ -55,7 +55,7 @@ from transformers.utils import check_min_version, send_example_telemetry | ||||
|  | ||||
| logger = logging.getLogger(__name__) | ||||
| # Will error if the minimal version of Transformers is not installed. Remove at your own risks. | ||||
| check_min_version("4.33.0.dev0") | ||||
| check_min_version("4.33.0") | ||||
|  | ||||
| Array = Any | ||||
| Dataset = datasets.arrow_dataset.Dataset | ||||
|  | ||||
| @ -56,7 +56,7 @@ from transformers.utils.versions import require_version | ||||
|  | ||||
| logger = logging.getLogger(__name__) | ||||
| # Will error if the minimal version of Transformers is not installed. Remove at your own risks. | ||||
| check_min_version("4.33.0.dev0") | ||||
| check_min_version("4.33.0") | ||||
|  | ||||
| require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/token-classification/requirements.txt") | ||||
|  | ||||
|  | ||||
| @ -45,7 +45,7 @@ from transformers.utils.versions import require_version | ||||
| logger = logging.getLogger(__name__) | ||||
|  | ||||
| # Will error if the minimal version of Transformers is not installed. Remove at your own risks. | ||||
| check_min_version("4.33.0.dev0") | ||||
| check_min_version("4.33.0") | ||||
|  | ||||
| require_version("datasets>=1.14.0", "To fix: pip install -r examples/pytorch/audio-classification/requirements.txt") | ||||
|  | ||||
|  | ||||
| @ -55,7 +55,7 @@ from transformers.utils.versions import require_version | ||||
| logger = logging.getLogger(__name__) | ||||
|  | ||||
| # Will error if the minimal version of Transformers is not installed. Remove at your own risks. | ||||
| check_min_version("4.33.0.dev0") | ||||
| check_min_version("4.33.0") | ||||
|  | ||||
| require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/contrastive-image-text/requirements.txt") | ||||
|  | ||||
|  | ||||
| @ -56,7 +56,7 @@ from transformers.utils.versions import require_version | ||||
| logger = logging.getLogger(__name__) | ||||
|  | ||||
| # Will error if the minimal version of Transformers is not installed. Remove at your own risks. | ||||
| check_min_version("4.33.0.dev0") | ||||
| check_min_version("4.33.0") | ||||
|  | ||||
| require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/image-classification/requirements.txt") | ||||
|  | ||||
|  | ||||
| @ -47,7 +47,7 @@ from transformers.utils.versions import require_version | ||||
|  | ||||
|  | ||||
| # Will error if the minimal version of Transformers is not installed. Remove at your own risks. | ||||
| check_min_version("4.33.0.dev0") | ||||
| check_min_version("4.33.0") | ||||
|  | ||||
| logger = get_logger(__name__) | ||||
|  | ||||
|  | ||||
| @ -44,7 +44,7 @@ from transformers.utils.versions import require_version | ||||
| logger = logging.getLogger(__name__) | ||||
|  | ||||
| # Will error if the minimal version of Transformers is not installed. Remove at your own risks. | ||||
| check_min_version("4.33.0.dev0") | ||||
| check_min_version("4.33.0") | ||||
|  | ||||
| require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/image-pretraining/requirements.txt") | ||||
|  | ||||
|  | ||||
| @ -49,7 +49,7 @@ Any model supported by the AutoModelForMaskedImageModeling API can be used. | ||||
| logger = logging.getLogger(__name__) | ||||
|  | ||||
| # Will error if the minimal version of Transformers is not installed. Remove at your own risks. | ||||
| check_min_version("4.33.0.dev0") | ||||
| check_min_version("4.33.0") | ||||
|  | ||||
| require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/image-pretraining/requirements.txt") | ||||
|  | ||||
|  | ||||
| @ -54,7 +54,7 @@ Any model supported by the AutoModelForMaskedImageModeling API can be used. | ||||
| logger = logging.getLogger(__name__) | ||||
|  | ||||
| # Will error if the minimal version of Transformers is not installed. Remove at your own risks. | ||||
| check_min_version("4.33.0.dev0") | ||||
| check_min_version("4.33.0") | ||||
|  | ||||
| require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/image-pretraining/requirements.txt") | ||||
|  | ||||
|  | ||||
| @ -56,7 +56,7 @@ from transformers.utils.versions import require_version | ||||
|  | ||||
|  | ||||
| # Will error if the minimal version of Transformers is not installed. Remove at your own risks. | ||||
| check_min_version("4.33.0.dev0") | ||||
| check_min_version("4.33.0") | ||||
|  | ||||
| require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt") | ||||
|  | ||||
|  | ||||
| @ -57,7 +57,7 @@ from transformers.utils.versions import require_version | ||||
|  | ||||
|  | ||||
| # Will error if the minimal version of Transformers is not installed. Remove at your own risks. | ||||
| check_min_version("4.33.0.dev0") | ||||
| check_min_version("4.33.0") | ||||
|  | ||||
| logger = get_logger(__name__) | ||||
|  | ||||
|  | ||||
| @ -54,7 +54,7 @@ from transformers.utils.versions import require_version | ||||
|  | ||||
|  | ||||
| # Will error if the minimal version of Transformers is not installed. Remove at your own risks. | ||||
| check_min_version("4.33.0.dev0") | ||||
| check_min_version("4.33.0") | ||||
|  | ||||
| require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt") | ||||
|  | ||||
|  | ||||
| @ -57,7 +57,7 @@ from transformers.utils.versions import require_version | ||||
|  | ||||
|  | ||||
| # Will error if the minimal version of Transformers is not installed. Remove at your own risks. | ||||
| check_min_version("4.33.0.dev0") | ||||
| check_min_version("4.33.0") | ||||
|  | ||||
| logger = get_logger(__name__) | ||||
| require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt") | ||||
|  | ||||
| @ -48,7 +48,7 @@ from transformers.utils.versions import require_version | ||||
|  | ||||
|  | ||||
| # Will error if the minimal version of Transformers is not installed. Remove at your own risks. | ||||
| check_min_version("4.33.0.dev0") | ||||
| check_min_version("4.33.0") | ||||
|  | ||||
| require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt") | ||||
|  | ||||
|  | ||||
| @ -48,7 +48,7 @@ from transformers.utils import PaddingStrategy, check_min_version, send_example_ | ||||
|  | ||||
|  | ||||
| # Will error if the minimal version of Transformers is not installed. Remove at your own risks. | ||||
| check_min_version("4.33.0.dev0") | ||||
| check_min_version("4.33.0") | ||||
|  | ||||
| logger = logging.getLogger(__name__) | ||||
|  | ||||
|  | ||||
| @ -56,7 +56,7 @@ from transformers.utils import PaddingStrategy, check_min_version, send_example_ | ||||
|  | ||||
|  | ||||
| # Will error if the minimal version of Transformers is not installed. Remove at your own risks. | ||||
| check_min_version("4.33.0.dev0") | ||||
| check_min_version("4.33.0") | ||||
|  | ||||
| logger = get_logger(__name__) | ||||
| # You should update this to your particular problem to have better documentation of `model_type` | ||||
|  | ||||
| @ -50,7 +50,7 @@ from transformers.utils.versions import require_version | ||||
|  | ||||
|  | ||||
| # Will error if the minimal version of Transformers is not installed. Remove at your own risks. | ||||
| check_min_version("4.33.0.dev0") | ||||
| check_min_version("4.33.0") | ||||
|  | ||||
| require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt") | ||||
|  | ||||
|  | ||||
| @ -49,7 +49,7 @@ from transformers.utils.versions import require_version | ||||
|  | ||||
|  | ||||
| # Will error if the minimal version of Transformers is not installed. Remove at your own risks. | ||||
| check_min_version("4.33.0.dev0") | ||||
| check_min_version("4.33.0") | ||||
|  | ||||
| require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt") | ||||
|  | ||||
|  | ||||
| @ -56,7 +56,7 @@ from transformers.utils.versions import require_version | ||||
|  | ||||
|  | ||||
| # Will error if the minimal version of Transformers is not installed. Remove at your own risks. | ||||
| check_min_version("4.33.0.dev0") | ||||
| check_min_version("4.33.0") | ||||
|  | ||||
| require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt") | ||||
|  | ||||
|  | ||||
| @ -57,7 +57,7 @@ from transformers.utils.versions import require_version | ||||
|  | ||||
|  | ||||
| # Will error if the minimal version of Transformers is not installed. Remove at your own risks. | ||||
| check_min_version("4.33.0.dev0") | ||||
| check_min_version("4.33.0") | ||||
|  | ||||
| require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt") | ||||
|  | ||||
|  | ||||
| @ -47,7 +47,7 @@ from transformers.utils.versions import require_version | ||||
|  | ||||
|  | ||||
| # Will error if the minimal version of Transformers is not installed. Remove at your own risks. | ||||
| check_min_version("4.33.0.dev0") | ||||
| check_min_version("4.33.0") | ||||
|  | ||||
| require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt") | ||||
|  | ||||
|  | ||||
| @ -52,7 +52,7 @@ from transformers.utils.versions import require_version | ||||
| logger = logging.getLogger(__name__) | ||||
|  | ||||
| # Will error if the minimal version of Transformers is not installed. Remove at your own risks. | ||||
| check_min_version("4.33.0.dev0") | ||||
| check_min_version("4.33.0") | ||||
|  | ||||
| require_version("datasets>=2.0.0", "To fix: pip install -r examples/pytorch/semantic-segmentation/requirements.txt") | ||||
|  | ||||
|  | ||||
| @ -50,7 +50,7 @@ from transformers.utils.versions import require_version | ||||
|  | ||||
|  | ||||
| # Will error if the minimal version of Transformers is not installed. Remove at your own risks. | ||||
| check_min_version("4.33.0.dev0") | ||||
| check_min_version("4.33.0") | ||||
|  | ||||
| logger = get_logger(__name__) | ||||
|  | ||||
|  | ||||
| @ -51,7 +51,7 @@ from transformers.utils.versions import require_version | ||||
|  | ||||
|  | ||||
| # Will error if the minimal version of Transformers is not installed. Remove at your own risks. | ||||
| check_min_version("4.33.0.dev0") | ||||
| check_min_version("4.33.0") | ||||
|  | ||||
| require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt") | ||||
|  | ||||
|  | ||||
| @ -53,7 +53,7 @@ from transformers.utils.versions import require_version | ||||
|  | ||||
|  | ||||
| # Will error if the minimal version of Transformers is not installed. Remove at your own risks. | ||||
| check_min_version("4.33.0.dev0") | ||||
| check_min_version("4.33.0") | ||||
|  | ||||
| require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt") | ||||
|  | ||||
|  | ||||
| @ -49,7 +49,7 @@ from transformers.utils.versions import require_version | ||||
|  | ||||
|  | ||||
| # Will error if the minimal version of Transformers is not installed. Remove at your own risks. | ||||
| check_min_version("4.33.0.dev0") | ||||
| check_min_version("4.33.0") | ||||
|  | ||||
| require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt") | ||||
|  | ||||
|  | ||||
| @ -53,7 +53,7 @@ from transformers.utils.versions import require_version | ||||
|  | ||||
|  | ||||
| # Will error if the minimal version of Transformers is not installed. Remove at your own risks. | ||||
| check_min_version("4.33.0.dev0") | ||||
| check_min_version("4.33.0") | ||||
|  | ||||
| require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/summarization/requirements.txt") | ||||
|  | ||||
|  | ||||
| @ -56,7 +56,7 @@ from transformers.utils.versions import require_version | ||||
|  | ||||
|  | ||||
| # Will error if the minimal version of Transformers is not installed. Remove at your own risks. | ||||
| check_min_version("4.33.0.dev0") | ||||
| check_min_version("4.33.0") | ||||
|  | ||||
| logger = get_logger(__name__) | ||||
| require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/summarization/requirements.txt") | ||||
|  | ||||
| @ -48,7 +48,7 @@ from transformers.utils.versions import require_version | ||||
|  | ||||
|  | ||||
| # Will error if the minimal version of Transformers is not installed. Remove at your own risks. | ||||
| check_min_version("4.33.0.dev0") | ||||
| check_min_version("4.33.0") | ||||
|  | ||||
| require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/text-classification/requirements.txt") | ||||
|  | ||||
|  | ||||
| @ -49,7 +49,7 @@ from transformers.utils.versions import require_version | ||||
|  | ||||
|  | ||||
| # Will error if the minimal version of Transformers is not installed. Remove at your own risks. | ||||
| check_min_version("4.33.0.dev0") | ||||
| check_min_version("4.33.0") | ||||
|  | ||||
| require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/text-classification/requirements.txt") | ||||
|  | ||||
|  | ||||
| @ -48,7 +48,7 @@ from transformers.utils.versions import require_version | ||||
|  | ||||
|  | ||||
| # Will error if the minimal version of Transformers is not installed. Remove at your own risks. | ||||
| check_min_version("4.33.0.dev0") | ||||
| check_min_version("4.33.0") | ||||
|  | ||||
| logger = get_logger(__name__) | ||||
|  | ||||
|  | ||||
| @ -49,7 +49,7 @@ from transformers.utils.versions import require_version | ||||
|  | ||||
|  | ||||
| # Will error if the minimal version of Transformers is not installed. Remove at your own risks. | ||||
| check_min_version("4.33.0.dev0") | ||||
| check_min_version("4.33.0") | ||||
|  | ||||
| require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/text-classification/requirements.txt") | ||||
|  | ||||
|  | ||||
| @ -50,7 +50,7 @@ from transformers.utils.versions import require_version | ||||
|  | ||||
|  | ||||
| # Will error if the minimal version of Transformers is not installed. Remove at your own risks. | ||||
| check_min_version("4.33.0.dev0") | ||||
| check_min_version("4.33.0") | ||||
|  | ||||
| require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/token-classification/requirements.txt") | ||||
|  | ||||
|  | ||||
| @ -56,7 +56,7 @@ from transformers.utils.versions import require_version | ||||
|  | ||||
|  | ||||
| # Will error if the minimal version of Transformers is not installed. Remove at your own risks. | ||||
| check_min_version("4.33.0.dev0") | ||||
| check_min_version("4.33.0") | ||||
|  | ||||
| logger = get_logger(__name__) | ||||
| require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/token-classification/requirements.txt") | ||||
|  | ||||
| @ -53,7 +53,7 @@ from transformers.utils.versions import require_version | ||||
|  | ||||
|  | ||||
| # Will error if the minimal version of Transformers is not installed. Remove at your own risks. | ||||
| check_min_version("4.33.0.dev0") | ||||
| check_min_version("4.33.0") | ||||
|  | ||||
| require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/translation/requirements.txt") | ||||
|  | ||||
|  | ||||
| @ -57,7 +57,7 @@ from transformers.utils.versions import require_version | ||||
|  | ||||
|  | ||||
| # Will error if the minimal version of Transformers is not installed. Remove at your own risks. | ||||
| check_min_version("4.33.0.dev0") | ||||
| check_min_version("4.33.0") | ||||
|  | ||||
| logger = get_logger(__name__) | ||||
| require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/translation/requirements.txt") | ||||
|  | ||||
| @ -52,7 +52,7 @@ from transformers.utils.versions import require_version | ||||
| logger = logging.getLogger(__name__) | ||||
|  | ||||
| # Will error if the minimal version of Transformers is not installed. Remove at your own risks. | ||||
| check_min_version("4.33.0.dev0") | ||||
| check_min_version("4.33.0") | ||||
|  | ||||
| require_version( | ||||
|     "datasets>=1.8.0", "To fix: pip install -r examples/tensorflow/contrastive-image-text/requirements.txt" | ||||
|  | ||||
| @ -55,7 +55,7 @@ from transformers.utils.versions import require_version | ||||
| logger = logging.getLogger(__name__) | ||||
|  | ||||
| # Will error if the minimal version of Transformers is not installed. Remove at your own risks. | ||||
| check_min_version("4.33.0.dev0") | ||||
| check_min_version("4.33.0") | ||||
|  | ||||
| require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/image-classification/requirements.txt") | ||||
|  | ||||
|  | ||||
| @ -51,7 +51,7 @@ from transformers.utils import PaddingStrategy, check_min_version, send_example_ | ||||
|  | ||||
|  | ||||
| # Will error if the minimal version of Transformers is not installed. Remove at your own risks. | ||||
| check_min_version("4.33.0.dev0") | ||||
| check_min_version("4.33.0") | ||||
|  | ||||
| logger = logging.getLogger(__name__) | ||||
|  | ||||
|  | ||||
| @ -49,7 +49,7 @@ from transformers.utils import CONFIG_NAME, TF2_WEIGHTS_NAME, check_min_version, | ||||
|  | ||||
|  | ||||
| # Will error if the minimal version of Transformers is not installed. Remove at your own risks. | ||||
| check_min_version("4.33.0.dev0") | ||||
| check_min_version("4.33.0") | ||||
|  | ||||
| logger = logging.getLogger(__name__) | ||||
|  | ||||
|  | ||||
| @ -54,7 +54,7 @@ from transformers.utils.versions import require_version | ||||
|  | ||||
| # region Checking dependencies | ||||
| # Will error if the minimal version of Transformers is not installed. Remove at your own risks. | ||||
| check_min_version("4.33.0.dev0") | ||||
| check_min_version("4.33.0") | ||||
|  | ||||
| require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/summarization/requirements.txt") | ||||
|  | ||||
|  | ||||
| @ -48,7 +48,7 @@ from transformers.utils import check_min_version, send_example_telemetry | ||||
|  | ||||
|  | ||||
| # Will error if the minimal version of Transformers is not installed. Remove at your own risks. | ||||
| check_min_version("4.33.0.dev0") | ||||
| check_min_version("4.33.0") | ||||
|  | ||||
| task_to_keys = { | ||||
|     "cola": ("sentence", None), | ||||
|  | ||||
| @ -57,7 +57,7 @@ from transformers.utils.versions import require_version | ||||
|  | ||||
| # region Dependencies and constants | ||||
| # Will error if the minimal version of Transformers is not installed. Remove at your own risks. | ||||
| check_min_version("4.33.0.dev0") | ||||
| check_min_version("4.33.0") | ||||
|  | ||||
| require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/summarization/requirements.txt") | ||||
|  | ||||
|  | ||||
							
								
								
									
										2
									
								
								setup.py
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								setup.py
									
									
									
									
									
								
							| @ -425,7 +425,7 @@ install_requires = [ | ||||
|  | ||||
| setup( | ||||
|     name="transformers", | ||||
|     version="4.33.0.dev0",  # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots) | ||||
|     version="4.33.3",  # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots) | ||||
|     author="The Hugging Face team (past and future) with the help of all our contributors (https://github.com/huggingface/transformers/graphs/contributors)", | ||||
|     author_email="transformers@huggingface.co", | ||||
|     description="State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow", | ||||
|  | ||||
| @ -18,7 +18,7 @@ | ||||
| # to defer the actual importing for when the objects are requested. This way `import transformers` provides the names | ||||
| # in the namespace without actually importing anything (and especially none of the backends). | ||||
|  | ||||
| __version__ = "4.33.0.dev0" | ||||
| __version__ = "4.33.3" | ||||
|  | ||||
| from typing import TYPE_CHECKING | ||||
|  | ||||
|  | ||||
| @ -26,6 +26,8 @@ from ..utils import is_accelerate_available, is_torch_available, logging | ||||
| if is_torch_available(): | ||||
|     import torch | ||||
|  | ||||
|     from ..optimization import get_scheduler | ||||
|  | ||||
| logger = logging.get_logger(__name__) | ||||
|  | ||||
|  | ||||
| @ -274,7 +276,7 @@ def deepspeed_optim_sched(trainer, hf_deepspeed_config, args, num_training_steps | ||||
|     # 1. DS scheduler + DS optimizer: Yes | ||||
|     # 2. HF scheduler + HF optimizer: Mostly* | ||||
|     # 3. DS scheduler + HF optimizer: Mostly* | ||||
|     # 4. HF scheduler + DS optimizer: No | ||||
|     # 4. HF scheduler + DS optimizer: Yes | ||||
|     # | ||||
|     # Mostly*: All non-native DeepSpeed optimizers that have both CPU and GPU implementation should work (except LAMB) | ||||
|  | ||||
| @ -304,11 +306,18 @@ def deepspeed_optim_sched(trainer, hf_deepspeed_config, args, num_training_steps | ||||
|         lr_scheduler = DummyScheduler(optimizer) | ||||
|     else: | ||||
|         if isinstance(optimizer, DummyOptim): | ||||
|             raise ValueError( | ||||
|                 "Found `optimizer` configured in the DeepSpeed config, but no `scheduler`. " | ||||
|                 "Please configure a scheduler in the DeepSpeed config." | ||||
|             ) | ||||
|         lr_scheduler = trainer.create_scheduler(num_training_steps=num_training_steps, optimizer=optimizer) | ||||
|  | ||||
|             def _lr_scheduler_callable(optimizer): | ||||
|                 return get_scheduler( | ||||
|                     trainer.args.lr_scheduler_type, | ||||
|                     optimizer=optimizer, | ||||
|                     num_warmup_steps=trainer.args.get_warmup_steps(num_training_steps), | ||||
|                     num_training_steps=num_training_steps, | ||||
|                 ) | ||||
|  | ||||
|             lr_scheduler = DummyScheduler(optimizer, lr_scheduler_callable=_lr_scheduler_callable) | ||||
|         else: | ||||
|             lr_scheduler = trainer.create_scheduler(num_training_steps=num_training_steps, optimizer=optimizer) | ||||
|  | ||||
|     return optimizer, lr_scheduler | ||||
|  | ||||
|  | ||||
| @ -1420,7 +1420,8 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix | ||||
|                 vectors at the end. Reducing the size will remove vectors from the end. If not provided or `None`, just | ||||
|                 returns a pointer to the input tokens `torch.nn.Embedding` module of the model without doing anything. | ||||
|             pad_to_multiple_of (`int`, *optional*): | ||||
|                 If set will pad the embedding matrix to a multiple of the provided value. | ||||
|                 If set will pad the embedding matrix to a multiple of the provided value.If `new_num_tokens` is set to | ||||
|                 `None` will just pad the embedding to a multiple of `pad_to_multiple_of`. | ||||
|  | ||||
|                 This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability | ||||
|                 `>= 7.5` (Volta), or on TPUs which benefit from having sequence lengths be a multiple of 128. For more | ||||
| @ -1431,12 +1432,12 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix | ||||
|             `torch.nn.Embedding`: Pointer to the input tokens Embeddings Module of the model. | ||||
|         """ | ||||
|         model_embeds = self._resize_token_embeddings(new_num_tokens, pad_to_multiple_of) | ||||
|         if new_num_tokens is None: | ||||
|         if new_num_tokens is None and pad_to_multiple_of is None: | ||||
|             return model_embeds | ||||
|  | ||||
|         # Update base model and current model config | ||||
|         self.config.vocab_size = new_num_tokens | ||||
|         self.vocab_size = new_num_tokens | ||||
|         self.config.vocab_size = model_embeds.weight.shape[0] | ||||
|         self.vocab_size = model_embeds.weight.shape[0] | ||||
|  | ||||
|         # Tie weights again if needed | ||||
|         self.tie_weights() | ||||
| @ -1451,10 +1452,20 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix | ||||
|             add_hook_to_module(new_embeddings, hook) | ||||
|         self.set_input_embeddings(new_embeddings) | ||||
|  | ||||
|         # Update new_num_tokens with the actual size of new_embeddings | ||||
|         if pad_to_multiple_of is not None: | ||||
|             if is_deepspeed_zero3_enabled(): | ||||
|                 import deepspeed | ||||
|  | ||||
|                 with deepspeed.zero.GatheredParameters(new_embeddings.weight, modifier_rank=None): | ||||
|                     new_num_tokens = new_embeddings.weight.shape[0] | ||||
|             else: | ||||
|                 new_num_tokens = new_embeddings.weight.shape[0] | ||||
|  | ||||
|         # if word embeddings are not tied, make sure that lm head is resized as well | ||||
|         if self.get_output_embeddings() is not None and not self.config.tie_word_embeddings: | ||||
|             old_lm_head = self.get_output_embeddings() | ||||
|             new_lm_head = self._get_resized_lm_head(old_lm_head, new_embeddings.weight.shape[0]) | ||||
|             new_lm_head = self._get_resized_lm_head(old_lm_head, new_num_tokens) | ||||
|             if hasattr(old_lm_head, "_hf_hook"): | ||||
|                 hook = old_lm_head._hf_hook | ||||
|                 add_hook_to_module(new_lm_head, hook) | ||||
| @ -1482,7 +1493,8 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix | ||||
|                 vectors from the end. If not provided or `None`, just returns a pointer to the input tokens | ||||
|                 `torch.nn.Embedding` module of the model without doing anything. | ||||
|             pad_to_multiple_of (`int`, *optional*): | ||||
|                 If set will pad the embedding matrix to a multiple of the provided value. | ||||
|                 If set will pad the embedding matrix to a multiple of the provided value. If `new_num_tokens` is set to | ||||
|                 `None` will just pad the embedding to a multiple of `pad_to_multiple_of`. | ||||
|  | ||||
|                 This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability | ||||
|                 `>= 7.5` (Volta), or on TPUs which benefit from having sequence lengths be a multiple of 128. For more | ||||
| @ -1522,7 +1534,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix | ||||
|         else: | ||||
|             old_num_tokens, old_embedding_dim = old_embeddings.weight.size() | ||||
|  | ||||
|         if old_num_tokens == new_num_tokens: | ||||
|         if old_num_tokens == new_num_tokens and not is_deepspeed_zero3_enabled(): | ||||
|             return old_embeddings | ||||
|  | ||||
|         if not isinstance(old_embeddings, nn.Embedding): | ||||
| @ -1532,40 +1544,34 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix | ||||
|                 f" {nn.Embedding}." | ||||
|             ) | ||||
|  | ||||
|         # Build new embeddings | ||||
|  | ||||
|         # When using DeepSpeed ZeRO-3, we shouldn't create new embeddings with DeepSpeed init | ||||
|         # because the shape of the new embedding layer is used across various modeling files | ||||
|         # as well as to update config vocab size. Shape will be 0 when using DeepSpeed init leading | ||||
|         # to errors when training. | ||||
|         new_embeddings = nn.Embedding( | ||||
|             new_num_tokens, | ||||
|             old_embedding_dim, | ||||
|             device=old_embeddings.weight.device, | ||||
|             dtype=old_embeddings.weight.dtype, | ||||
|         ) | ||||
|  | ||||
|         # initialize all new embeddings (in particular added tokens) | ||||
|         self._init_weights(new_embeddings) | ||||
|  | ||||
|         # Copy token embeddings from the previous weights | ||||
|  | ||||
|         # numbers of tokens to copy | ||||
|         n = min(old_num_tokens, new_num_tokens) | ||||
|  | ||||
|         if is_deepspeed_zero3_enabled(): | ||||
|             import deepspeed | ||||
|  | ||||
|             with deepspeed.zero.Init(config_dict_or_path=deepspeed_config()): | ||||
|                 # Build new embeddings | ||||
|                 new_embeddings = nn.Embedding( | ||||
|                     new_num_tokens, | ||||
|                     old_embedding_dim, | ||||
|                     device=old_embeddings.weight.device, | ||||
|                     dtype=old_embeddings.weight.dtype, | ||||
|                 ) | ||||
|  | ||||
|             params = [old_embeddings.weight, new_embeddings.weight] | ||||
|             with deepspeed.zero.GatheredParameters(params, modifier_rank=0): | ||||
|                 # initialize all new embeddings (in particular added tokens) | ||||
|                 self._init_weights(new_embeddings) | ||||
|  | ||||
|                 # Copy token embeddings from the previous weights | ||||
|                 new_embeddings.weight.data[:n, :] = old_embeddings.weight.data[:n, :] | ||||
|         else: | ||||
|             # Build new embeddings | ||||
|             new_embeddings = nn.Embedding( | ||||
|                 new_num_tokens, | ||||
|                 old_embedding_dim, | ||||
|                 device=old_embeddings.weight.device, | ||||
|                 dtype=old_embeddings.weight.dtype, | ||||
|             ) | ||||
|  | ||||
|             # initialize all new embeddings (in particular added tokens) | ||||
|             self._init_weights(new_embeddings) | ||||
|  | ||||
|             # Copy token embeddings from the previous weights | ||||
|             new_embeddings.weight.data[:n, :] = old_embeddings.weight.data[:n, :] | ||||
|  | ||||
|         return new_embeddings | ||||
| @ -1608,7 +1614,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix | ||||
|                 old_lm_head.weight.size() if not transposed else old_lm_head.weight.t().size() | ||||
|             ) | ||||
|  | ||||
|         if old_num_tokens == new_num_tokens: | ||||
|         if old_num_tokens == new_num_tokens and not is_deepspeed_zero3_enabled(): | ||||
|             return old_lm_head | ||||
|  | ||||
|         if not isinstance(old_lm_head, nn.Linear): | ||||
| @ -1622,51 +1628,50 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix | ||||
|         new_lm_head_shape = (old_lm_head_dim, new_num_tokens) if not transposed else (new_num_tokens, old_lm_head_dim) | ||||
|         has_new_lm_head_bias = old_lm_head.bias is not None | ||||
|  | ||||
|         # When using DeepSpeed ZeRO-3, we shouldn't create new embeddings with DeepSpeed init | ||||
|         # because the shape of the new embedding layer is used across various modeling files | ||||
|         # as well as to update config vocab size. Shape will be 0 when using DeepSpeed init leading | ||||
|         # to errors when training. | ||||
|         new_lm_head = nn.Linear( | ||||
|             *new_lm_head_shape, | ||||
|             bias=has_new_lm_head_bias, | ||||
|             device=old_lm_head.weight.device, | ||||
|             dtype=old_lm_head.weight.dtype, | ||||
|         ) | ||||
|  | ||||
|         # initialize new lm head (in particular added tokens) | ||||
|         self._init_weights(new_lm_head) | ||||
|  | ||||
|         num_tokens_to_copy = min(old_num_tokens, new_num_tokens) | ||||
|  | ||||
|         # XXX: put the long block of code in a wrapper | ||||
|         if is_deepspeed_zero3_enabled(): | ||||
|             import deepspeed | ||||
|  | ||||
|             with deepspeed.zero.Init(config_dict_or_path=deepspeed_config()): | ||||
|                 new_lm_head = nn.Linear( | ||||
|                     *new_lm_head_shape, | ||||
|                     bias=has_new_lm_head_bias, | ||||
|                     device=old_lm_head.weight.device, | ||||
|                     dtype=old_lm_head.weight.dtype, | ||||
|                 ) | ||||
|             params = [old_lm_head.weight, old_lm_head.bias, new_lm_head.weight, new_lm_head.bias] | ||||
|             with deepspeed.zero.GatheredParameters(params, modifier_rank=0): | ||||
|                 self._init_weights(new_lm_head) | ||||
|                 # Copy old lm head weights to new lm head | ||||
|                 if not transposed: | ||||
|                     new_lm_head.weight.data[:num_tokens_to_copy, :] = old_lm_head.weight.data[:num_tokens_to_copy, :] | ||||
|                 else: | ||||
|                     new_lm_head.weight.data[:, :num_tokens_to_copy] = old_lm_head.weight.data[:, :num_tokens_to_copy] | ||||
|  | ||||
|                 # Copy bias weights to new lm head | ||||
|                 if has_new_lm_head_bias: | ||||
|                     new_lm_head.bias.data[:num_tokens_to_copy] = old_lm_head.bias.data[:num_tokens_to_copy] | ||||
|                 self._copy_lm_head_original_to_resized( | ||||
|                     new_lm_head, old_lm_head, num_tokens_to_copy, transposed, has_new_lm_head_bias | ||||
|                 ) | ||||
|         else: | ||||
|             new_lm_head = nn.Linear( | ||||
|                 *new_lm_head_shape, | ||||
|                 bias=has_new_lm_head_bias, | ||||
|                 device=old_lm_head.weight.device, | ||||
|                 dtype=old_lm_head.weight.dtype, | ||||
|             self._copy_lm_head_original_to_resized( | ||||
|                 new_lm_head, old_lm_head, num_tokens_to_copy, transposed, has_new_lm_head_bias | ||||
|             ) | ||||
|             self._init_weights(new_lm_head) | ||||
|             # Copy old lm head weights to new lm head | ||||
|             if not transposed: | ||||
|                 new_lm_head.weight.data[:num_tokens_to_copy, :] = old_lm_head.weight.data[:num_tokens_to_copy, :] | ||||
|             else: | ||||
|                 new_lm_head.weight.data[:, :num_tokens_to_copy] = old_lm_head.weight.data[:, :num_tokens_to_copy] | ||||
|  | ||||
|             # Copy bias weights to new lm head | ||||
|             if has_new_lm_head_bias: | ||||
|                 new_lm_head.bias.data[:num_tokens_to_copy] = old_lm_head.bias.data[:num_tokens_to_copy] | ||||
|  | ||||
|         return new_lm_head | ||||
|  | ||||
|     def _copy_lm_head_original_to_resized( | ||||
|         self, new_lm_head, old_lm_head, num_tokens_to_copy, transposed, has_new_lm_head_bias | ||||
|     ): | ||||
|         # Copy old lm head weights to new lm head | ||||
|         if not transposed: | ||||
|             new_lm_head.weight.data[:num_tokens_to_copy, :] = old_lm_head.weight.data[:num_tokens_to_copy, :] | ||||
|         else: | ||||
|             new_lm_head.weight.data[:, :num_tokens_to_copy] = old_lm_head.weight.data[:, :num_tokens_to_copy] | ||||
|  | ||||
|         # Copy bias weights to new lm head | ||||
|         if has_new_lm_head_bias: | ||||
|             new_lm_head.bias.data[:num_tokens_to_copy] = old_lm_head.bias.data[:num_tokens_to_copy] | ||||
|  | ||||
|     def resize_position_embeddings(self, new_num_position_embeddings: int): | ||||
|         raise NotImplementedError( | ||||
|             f"`resize_position_embeddings` is not implemented for {self.__class__}`. To implement it, you should " | ||||
|  | ||||
| @ -1016,13 +1016,11 @@ class AutoConfig: | ||||
|         kwargs["name_or_path"] = pretrained_model_name_or_path | ||||
|         trust_remote_code = kwargs.pop("trust_remote_code", None) | ||||
|         code_revision = kwargs.pop("code_revision", None) | ||||
|  | ||||
|         revision = kwargs.pop("revision", None) | ||||
|         kwargs["revision"] = sanitize_code_revision(pretrained_model_name_or_path, revision, trust_remote_code) | ||||
|  | ||||
|         revision = sanitize_code_revision(pretrained_model_name_or_path, revision, trust_remote_code) | ||||
|  | ||||
|         config_dict, unused_kwargs = PretrainedConfig.get_config_dict( | ||||
|             pretrained_model_name_or_path, revision=revision, **kwargs | ||||
|         ) | ||||
|         config_dict, unused_kwargs = PretrainedConfig.get_config_dict(pretrained_model_name_or_path, **kwargs) | ||||
|         has_remote_code = "auto_map" in config_dict and "AutoConfig" in config_dict["auto_map"] | ||||
|         has_local_code = "model_type" in config_dict and config_dict["model_type"] in CONFIG_MAPPING | ||||
|         trust_remote_code = resolve_trust_remote_code( | ||||
|  | ||||
| @ -1086,21 +1086,57 @@ class BarkFineModel(BarkPreTrainedModel): | ||||
|             ] | ||||
|         ) | ||||
|         self.set_input_embeddings(new_embeddings_list) | ||||
|         new_num_tokens = [embed.weight.shape[0] for embed in new_embeddings_list] | ||||
|         new_num_tokens = new_embeddings_list[0].weight.shape[0] | ||||
|  | ||||
|         # if word embeddings are not tied, make sure that lm head is resized as well | ||||
|         if self.get_output_embeddings() is not None and not self.config.tie_word_embeddings: | ||||
|             old_lm_head_list = self.get_output_embeddings() | ||||
|             new_lm_head_list = nn.ModuleList( | ||||
|                 [ | ||||
|                     self._get_resized_lm_head(old_lm_head, new_num_token) | ||||
|                     for old_lm_head, new_num_token in zip(old_lm_head_list, new_num_tokens) | ||||
|                 ] | ||||
|                 [self._get_resized_lm_head(old_lm_head, new_num_tokens) for old_lm_head in old_lm_head_list] | ||||
|             ) | ||||
|             self.set_output_embeddings(new_lm_head_list) | ||||
|  | ||||
|         return self.get_input_embeddings() | ||||
|  | ||||
|     def resize_token_embeddings( | ||||
|         self, new_num_tokens: Optional[int] = None, pad_to_multiple_of: Optional[int] = None | ||||
|     ) -> nn.Embedding: | ||||
|         """ | ||||
|         Resizes input token embeddings matrix of the model if `new_num_tokens != config.vocab_size`. | ||||
|  | ||||
|         Takes care of tying weights embeddings afterwards if the model class has a `tie_weights()` method. | ||||
|  | ||||
|         Arguments: | ||||
|             new_num_tokens (`int`, *optional*): | ||||
|                 The number of new tokens in the embedding matrix. Increasing the size will add newly initialized | ||||
|                 vectors at the end. Reducing the size will remove vectors from the end. If not provided or `None`, just | ||||
|                 returns a pointer to the input tokens `torch.nn.Embedding` module of the model without doing anything. | ||||
|             pad_to_multiple_of (`int`, *optional*): | ||||
|                 If set will pad the embedding matrix to a multiple of the provided value. | ||||
|  | ||||
|                 This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability | ||||
|                 `>= 7.5` (Volta), or on TPUs which benefit from having sequence lengths be a multiple of 128. For more | ||||
|                 details about this, or help on choosing the correct value for resizing, refer to this guide: | ||||
|                 https://docs.nvidia.com/deeplearning/performance/dl-performance-matrix-multiplication/index.html#requirements-tc | ||||
|  | ||||
|         Return: | ||||
|             `torch.nn.Embedding`: Pointer to the input tokens Embeddings Module of the model. | ||||
|         """ | ||||
|         model_embeds = self._resize_token_embeddings(new_num_tokens, pad_to_multiple_of) | ||||
|         if new_num_tokens is None and pad_to_multiple_of is None: | ||||
|             return model_embeds | ||||
|  | ||||
|         # Update base model and current model config | ||||
|         self.config.output_vocab_size = model_embeds[0].weight.shape[0] | ||||
|         self.config.vocab_size = model_embeds[0].weight.shape[0] | ||||
|         self.output_vocab_size = model_embeds[0].weight.shape[0] | ||||
|         self.vocab_size = model_embeds[0].weight.shape[0] | ||||
|  | ||||
|         # Tie weights again if needed | ||||
|         self.tie_weights() | ||||
|  | ||||
|         return model_embeds | ||||
|  | ||||
|     def tie_weights(self): | ||||
|         """ | ||||
|         Tie the weights between the input embeddings list and the output embeddings list. | ||||
|  | ||||
| @ -60,7 +60,7 @@ from .data.data_collator import DataCollator, DataCollatorWithPadding, default_d | ||||
| from .debug_utils import DebugOption, DebugUnderflowOverflow | ||||
| from .dependency_versions_check import dep_version_check | ||||
| from .hyperparameter_search import ALL_HYPERPARAMETER_SEARCH_BACKENDS, default_hp_search_backend | ||||
| from .integrations.deepspeed import deepspeed_init, deepspeed_load_checkpoint | ||||
| from .integrations.deepspeed import deepspeed_init, deepspeed_load_checkpoint, is_deepspeed_available | ||||
| from .modelcard import TrainingSummary | ||||
| from .modeling_utils import PreTrainedModel, load_sharded_checkpoint, unwrap_model | ||||
| from .models.auto.modeling_auto import MODEL_FOR_CAUSAL_LM_MAPPING_NAMES, MODEL_MAPPING_NAMES | ||||
| @ -212,6 +212,9 @@ if is_accelerate_available(): | ||||
|             save_fsdp_optimizer, | ||||
|         ) | ||||
|  | ||||
|     if is_deepspeed_available(): | ||||
|         from accelerate.utils import DeepSpeedSchedulerWrapper | ||||
|  | ||||
|  | ||||
| if TYPE_CHECKING: | ||||
|     import optuna | ||||
| @ -2362,7 +2365,14 @@ class Trainer: | ||||
|             torch.save(self.optimizer.state_dict(), os.path.join(output_dir, OPTIMIZER_NAME)) | ||||
|  | ||||
|         # Save SCHEDULER & SCALER | ||||
|         if self.args.should_save and not self.is_deepspeed_enabled and not is_torch_tpu_available(): | ||||
|         is_deepspeed_custom_scheduler = self.is_deepspeed_enabled and not isinstance( | ||||
|             self.lr_scheduler, DeepSpeedSchedulerWrapper | ||||
|         ) | ||||
|         if ( | ||||
|             self.args.should_save | ||||
|             and (not self.is_deepspeed_enabled or is_deepspeed_custom_scheduler) | ||||
|             and not is_torch_tpu_available() | ||||
|         ): | ||||
|             with warnings.catch_warnings(record=True) as caught_warnings: | ||||
|                 torch.save(self.lr_scheduler.state_dict(), os.path.join(output_dir, SCHEDULER_NAME)) | ||||
|             reissue_pt_warnings(caught_warnings) | ||||
| @ -2428,6 +2438,10 @@ class Trainer: | ||||
|  | ||||
|         if self.is_deepspeed_enabled: | ||||
|             # deepspeed loads optimizer/lr_scheduler together with the model in deepspeed_init | ||||
|             if not isinstance(self.lr_scheduler, DeepSpeedSchedulerWrapper): | ||||
|                 with warnings.catch_warnings(record=True) as caught_warnings: | ||||
|                     self.lr_scheduler.load_state_dict(torch.load(os.path.join(checkpoint, SCHEDULER_NAME))) | ||||
|                 reissue_pt_warnings(caught_warnings) | ||||
|             return | ||||
|  | ||||
|         checkpoint_file_exists = ( | ||||
|  | ||||
| @ -136,6 +136,14 @@ ZERO3 = "zero3" | ||||
| FP16 = "fp16" | ||||
| BF16 = "bf16" | ||||
|  | ||||
| HF_OPTIM = "hf_optim" | ||||
| HF_SCHEDULER = "hf_scheduler" | ||||
| DS_OPTIM = "ds_optim" | ||||
| DS_SCHEDULER = "ds_scheduler" | ||||
|  | ||||
| optims = [HF_OPTIM, DS_OPTIM] | ||||
| schedulers = [HF_SCHEDULER, DS_SCHEDULER] | ||||
|  | ||||
| stages = [ZERO2, ZERO3] | ||||
| if is_torch_bf16_gpu_available(): | ||||
|     dtypes = [FP16, BF16] | ||||
| @ -153,6 +161,8 @@ def parameterized_custom_name_func(func, param_num, param): | ||||
| # Cartesian-product of zero stages with models to test | ||||
| params = list(itertools.product(stages, dtypes)) | ||||
|  | ||||
| params_with_optims_and_schedulers = list(itertools.product(stages, dtypes, optims, schedulers)) | ||||
|  | ||||
|  | ||||
| @require_deepspeed | ||||
| @require_torch_gpu | ||||
| @ -640,10 +650,16 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T | ||||
|                 "Can't find a valid checkpoint at" in str(context.exception), f"got exception: {context.exception}" | ||||
|             ) | ||||
|  | ||||
|     @parameterized.expand(params, name_func=parameterized_custom_name_func) | ||||
|     def test_can_resume_training_normal(self, stage, dtype): | ||||
|     @parameterized.expand(params_with_optims_and_schedulers, name_func=parameterized_custom_name_func) | ||||
|     def test_can_resume_training_normal(self, stage, dtype, optim, scheduler): | ||||
|         # adapted from TrainerIntegrationTest.test_can_resume_training | ||||
|         # test normal resume for each stage separately, error-handling is tested in a different test | ||||
|  | ||||
|         # ToDo: Currently, hf_optim + hf_scheduler resumes with the correct states and | ||||
|         # also has same losses for few steps but then slowly diverges. Need to figure it out. | ||||
|         if optim == HF_OPTIM and scheduler == HF_SCHEDULER: | ||||
|             return | ||||
|  | ||||
|         output_dir = self.get_auto_remove_tmp_dir("./xxx", after=False) | ||||
|         ds_config_dict = self.get_config_dict(stage) | ||||
|         if dtype == FP16: | ||||
| @ -652,6 +668,12 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T | ||||
|         if stage == ZERO3: | ||||
|             ds_config_dict["zero_optimization"]["stage3_gather_16bit_weights_on_model_save"] = True | ||||
|  | ||||
|         if optim == HF_OPTIM: | ||||
|             del ds_config_dict["optimizer"] | ||||
|  | ||||
|         if scheduler == HF_SCHEDULER: | ||||
|             del ds_config_dict["scheduler"] | ||||
|  | ||||
|         kwargs = { | ||||
|             "output_dir": output_dir, | ||||
|             "train_len": 128, | ||||
|  | ||||
| @ -1424,6 +1424,9 @@ class ModelTesterMixin: | ||||
|             model_embed = model.resize_token_embeddings(model_vocab_size, pad_to_multiple_of=64) | ||||
|             self.assertTrue(model_embed.weight.shape[0] // 64, 0) | ||||
|  | ||||
|             self.assertTrue(model_embed.weight.shape[0], model.config.vocab_size) | ||||
|             self.assertTrue(model.config.vocab_size, model.vocab_size) | ||||
|  | ||||
|             model_embed = model.resize_token_embeddings(model_vocab_size + 13, pad_to_multiple_of=64) | ||||
|             self.assertTrue(model_embed.weight.shape[0] // 64, 0) | ||||
|  | ||||
|  | ||||
		Reference in New Issue
	
	Block a user
	