\begin{thebibliography}{10} \bibitem{Arl:2007:phd} Sylvain Arlot. \newblock {\em Resampling and Model Selection}. \newblock PhD thesis, University Paris-Sud 11, December 2007. \newblock oai:tel.archives-ouvertes.fr:tel-00198803\_v1. \bibitem{Arl:2008a} Sylvain Arlot. \newblock {$V$}-fold cross-validation improved: {$V$}-fold penalization, February 2008. \newblock arXiv:0802.0566v2. \bibitem{Arl:2009:RP} Sylvain Arlot. \newblock Model selection by resampling penalization. \newblock {\em Electron. J. Stat.}, 3:557--624 (electronic), 2009. \bibitem{Arl_Mas:2009:pente} Sylvain Arlot and Pascal Massart. \newblock Data-driven calibration of penalties for least-squares regression. \newblock {\em J. Mach. Learn. Res.}, 10:245--279 (electronic), 2009. \bibitem{Aud:2004:PMA908} Jean-Yves Audibert. \newblock Classification under polynomial entropy and margin assumptions and randomized estimators. \newblock Preprint, Laboratoire de Probabilites et Modeles Aleatoires, 2004. \bibitem{Aud_Tsy:2007} Jean-Yves Audibert and Alexandre~B. Tsybakov. \newblock Fast learning rates for plug-in classifiers. \newblock {\em Ann. Statist.}, 35(2):608--633, 2007. \bibitem{Bar_Bir_Mas:1999} Andrew Barron, Lucien Birg{\'e}, and Pascal Massart. \newblock Risk bounds for model selection via penalization. \newblock {\em Probab. Theory Related Fields}, 113(3):301--413, 1999. \bibitem{Bar_Bou_Men:2005} Peter~L. Bartlett, Olivier Bousquet, and Shahar Mendelson. \newblock Local {R}ademacher complexities. \newblock {\em Annals of Statistics}, 33(4):1497--1537, 2005. \bibitem{Bar_Jor_McA:2006} Peter~L.\ Bartlett, Michael~I.\ Jordan, and Jon~D.\ McAuliffe. \newblock Convexity, classification, and risk bounds. \newblock {\em Journal of the American Statistical Association}, 101(473):138--156, 2006. \bibitem{Bar_Men_Phi:2004} Peter~L. Bartlett, Shahar Mendelson, and Petra Philips. \newblock Local complexities for empirical risk minimization. \newblock In {\em Learning theory}, volume 3120 of {\em Lecture Notes in Comput. Sci.}, pages 270--284. Springer, Berlin, 2004. \bibitem{Bir_Mas:1998} Lucien Birg{\'e} and Pascal Massart. \newblock Minimum contrast estimators on sieves: exponential bounds and rates of convergence. \newblock {\em Bernoulli}, 4(3):329--375, 1998. \bibitem{Bla_Lug_Vay:2003} Gilles Blanchard, G{\'a}bor Lugosi, and Nicolas Vayatis. \newblock On the rate of convergence of regularized boosting classifiers. \newblock {\em J. Mach. Learn. Res.}, 4(5):861--894, 2004. \bibitem{Bla_Mas:2006} Gilles Blanchard and Pascal Massart. \newblock Discussion: ``{L}ocal {R}ademacher complexities and oracle inequalities in risk minimization'' [{A}nn. {S}tatist. {\bf 34} (2006), no. 6, 2593--2656] by {V}. {K}oltchinskii. \newblock {\em Ann. Statist.}, 34(6):2664--2671, 2006. \bibitem{Dev_Lug:1995} Luc Devroye and G{\'a}bor Lugosi. \newblock Lower bounds in pattern recognition and learning. \newblock {\em Pattern Recognition}, 28(7):1011--1018, 1995. \bibitem{Efr:1983} Bradley Efron. \newblock Estimating the error rate of a prediction rule: improvement on cross-validation. \newblock {\em J. Amer. Statist. Assoc.}, 78(382):316--331, 1983. \bibitem{Kol:2006} Vladimir Koltchinskii. \newblock Local {R}ademacher complexities and oracle inequalities in risk minimization. \newblock {\em Ann. Statist.}, 34(6):2593--2656, 2006. \bibitem{Lec:2007c} Guillaume Lecu{\'e}. \newblock Simultaneous adaptation to the margin and to complexity in classification. \newblock {\em Ann. Statist.}, 35(4):1698--1721, 2007. \bibitem{Lec:2007b} Guillaume Lecu{\'e}. \newblock Suboptimality of penalized empirical risk minimization in classification. \newblock In {\em COLT 2007}, volume 4539 of {\em Lecture Notes in Artificial Intelligence}. Springer, Berlin, 2007. \bibitem{Lug:2002} G{\'a}bor Lugosi. \newblock Pattern classification and learning theory. \newblock In {\em Principles of nonparametric learning (Udine, 2001)}, volume 434 of {\em CISM Courses and Lectures}, pages 1--56. Springer, Vienna, 2002. \bibitem{Lug_Weg:2004} G{\'a}bor Lugosi and Marten Wegkamp. \newblock Complexity regularization via localized random penalties. \newblock {\em Ann. Statist.}, 32(4):1679--1697, 2004. \bibitem{Mam_Tsy:1999} Enno Mammen and Alexandre~B. Tsybakov. \newblock Smooth discrimination analysis. \newblock {\em Ann. Statist.}, 27(6):1808--1829, 1999. \bibitem{Mas:2003:St-Flour} Pascal Massart. \newblock {\em Concentration inequalities and model selection}, volume 1896 of {\em Lecture Notes in Mathematics}. \newblock Springer, Berlin, 2007. \newblock Lectures from the 33rd Summer School on Probability Theory held in Saint-Flour, July 6--23, 2003. \bibitem{Mas_Ned:2003} Pascal Massart and {\'E}lodie N{\'e}d{\'e}lec. \newblock Risk bounds for statistical learning. \newblock {\em Ann. Statist.}, 34(5):2326--2366, 2006. \bibitem{Tsy:2004} Alexandre~B. Tsybakov. \newblock Optimal aggregation of classifiers in statistical learning. \newblock {\em Ann. Statist.}, 32(1):135--166, 2004. \bibitem{Tsy_vdG:2005} Alexandre~B. Tsybakov and Sara~A. van~de Geer. \newblock Square root penalty: adaptation to the margin in classification and in edge estimation. \newblock {\em Ann. Statist.}, 33(3):1203--1224, 2005. \bibitem{Vap:1998} Vladimir~N. Vapnik. \newblock {\em Statistical learning theory}. \newblock John Wiley \& Sons Inc., New York, 1998. \bibitem{vc-ucrfep-71} Vladimir~N. Vapnik and A.~Y. Chervonenkis. \newblock On the uniform convergence of relative frequencies of events to their probabilities. \newblock {\em Theory of Probability and its Applications}, 16(2):264--280, 1971. \end{thebibliography}