guide.bib

@inproceedings{Wolfe2008,
  title={Fully Distributed EM for Very Large Datasets},
  author={Wolfe, J. and Haghighi, A. and Klein, D.},
  booktitle={Prof. of the 25th International Conference on Machine Learning},
  year={2008}
}

@inproceedings{Martins2013ACL,
  title={Turning on the Turbo: Fast Third-Order Non-Projective Turbo Parsers},
  author={Martins, A.~F.~T. and Almeida, M.~B. and Smith, N.~A.},
  booktitle={Proc. of the Annual Meeting of the Association for Computational Linguistics},
  year={2013}
}

@incollection{Neal1998,
  title={A view of the EM algorithm that justifies incremental, sparse, and other variants},
  author={Neal, Radford M and Hinton, Geoffrey E},
  booktitle={Learning in graphical models},
  pages={355--368},
  year={1998},
  publisher={Springer}
}

@book{Duda2001,
  title={Pattern classification},
  author={Duda, R.O. and Hart, P.E. and Stork, D.G.},
  volume={2},
  year={2001},
  publisher={Wiley New York}
}

@book{Bishop2006,
  title={Pattern recognition and machine learning},
  author={Bishop, C.M.},
  volume={4},
  year={2006},
  publisher={Springer New York}
}

@book{Joachims2002,
  title={Learning to Classify Text Using Support Vector Machines: Methods, Theory and Algorithms},
  author={Joachims, T.},
  publisher={Kluwer Academic Publishers},
  year={2002}
}

@book{Mitchell1997,
  title={Machine learning},
  author={Mitchell, T.M.},
  journal={Mac Graw Hill},
  year={1997}
}


@book{Cover1991,
  title={Elements of information theory},
  author={Cover, T.M. and Thomas, J.A. and Wiley, J. and others},
  volume={6},
  year={1991},
  publisher={Wiley Online Library}
}


@article{Jaynes1982,
  title={On the rationale of maximum-entropy methods},
  author={Jaynes, E.T.},
  journal={Proceedings of the IEEE},
  volume={70},
  number={9},
  pages={939--952},
  year={1982},
  publisher={IEEE}
}

@article{Shannon1948,
  title={A mathematical theory of communication},
  author={Shannon, C.E.},
  journal={Bell Syst. Tech. Journ.},
  volume={27},
  number={379},
  pages={623},
  year={1948}
}


@Article{PER-GRA:2007,
  Author         = {P\'erez, Fernando and Granger, Brian E.},
  Title          = {{IP}ython: a {S}ystem for {I}nteractive {S}cientific
                   {C}omputing},
  Journal        = {{C}omput. {S}ci. {E}ng.},
  Volume         = {9},
  Number         = {3},
  Pages          = {21-29},
  month          = may,
  year           = 2007,
  url            = "http://ipython.scipy.org",
}


@Misc{scipy,
  author =    {Eric Jones and Travis Oliphant and Pearu Peterson and others},
  title =     {{SciPy}: Open source scientific tools for {Python}},
  year =      {2001--},
  url = "http://www.scipy.org/"
}

@phdthesis{Bottou1991,
  title={Une Approche Theorique de l'Apprentissage Connexionniste: 
  Applications a la Reconnaissance de la 
  Parole},
  author={Bottou, L.},
  institution={Universit{\'e} de Paris XI},
  year={1991}
}

@article{Johnson1998,
  title={PCFG models of linguistic tree representations},
  author={Johnson, M.},
  journal={Computational Linguistics},
  volume={24},
  number={4},
  pages={613--632},
  year={1998},
  publisher={MIT Press}
}

@inproceedings{Klein2003,
  title={Accurate unlexicalized parsing},
  author={Klein, D. and   Manning, C.D.},
  booktitle={Proceedings of the 41st Annual Meeting on Association for Computational Linguistics-Volume 1},
  pages={423--430},
  year={2003},
  organization={Association for Computational Linguistics}
}

@phdthesis{Collins1999,
  title={Head-driven statistical models for natural language parsing},
  author={Collins, M.},
  year={1999},
  school={University of Pennsylvania}
}

@inproceedings{Magerman1995,
  title={Statistical decision-tree models for parsing},
  author={Magerman, D.M.},
  booktitle={Proceedings of the 33rd annual meeting on Association for Computational Linguistics},
  pages={276--283},
  year={1995},
  organization={Association for Computational Linguistics}
}

@inproceedings{Charniak1997,
  title={Statistical parsing with a context-free grammar and word statistics},
  author={Charniak, E.},
  booktitle={Proceedings of the National Conference on Artificial Intelligence},
  pages={598--603},
  year={1997},
  organization={Citeseer}
}

@article{Finkel2008,
  title={Efficient, feature-based, conditional random field parsing},
  author={Finkel, J.R. and Kleeman, A. and Manning, C.D.},
  journal={Proceedings of ACL-08: HLT},
  pages={959--967},
  year={2008},
  publisher={Citeseer}
}

@inproceedings{Taskar2004,
  title={Max-margin parsing},
  author={Taskar, B. and Klein, D. and Collins, M. and Koller, D. and Manning, C.},
  booktitle={Proc. EMNLP},
  pages={1--8},
  year={2004}
}

@InProceedings{Petrov2008EMNLP,
  author    = {Petrov, Slav  and  Klein, Dan},
  title     = {Sparse Multi-Scale Grammars for Discriminative Latent Variable Parsing},
  booktitle = {Proceedings of the 2008 Conference on Empirical Methods in Natural Language Processing},
  month     = {October},
  year      = {2008},
  address   = {Honolulu, Hawaii},
  publisher = {Association for Computational Linguistics},
  pages     = {867--876},
  url       = {http://www.aclweb.org/anthology/D08-1091}
}

@InProceedings{Petrov2008NIPS,
  author    = {Slav Petrov and Dan Klein},
  title     = {Discriminative Log-Linear Grammars with Latent Variables},
  booktitle = {Advances in Neural Information Processing Systems 20 (NIPS)},
  editor    = {J.C. Platt and D. Koller and Y. Singer and S. Roweis},
  publisher = {MIT Press},
  address   = {Cambridge, MA},
  pages     = {1153--1160},
  year      = {2008},
  url       = {http://books.nips.cc/papers/files/nips20/NIPS2007_0630.pdf}
}

@InProceedings{Petrov2007NAACL,
  author    = {Petrov, Slav  and  Klein, Dan},
    title     = {Improved Inference for Unlexicalized Parsing},
    booktitle = {Human Language Technologies 2007: The Conference of the North American Chapter of the Association for Computational Linguistics; Proceedings of the Main Conference},   
    month     = {April},
    year      = {2007},
    address   = {Rochester, New York},
    publisher = {Association for Computational Linguistics},
    pages     = {404--411},
    url       = {http://www.aclweb.org/anthology/N/N07/N07-1051}
}

@inproceedings{Charniak2006,
  title={Multilevel coarse-to-fine PCFG parsing},
  author={Charniak, E. and Johnson, M. and Elsner, M. and Austerweil, J. and Ellis, D. and Haxton, I. and Hill, C. and Shrivaths, R. and Moore, J. and Pozar, M. and others},
  booktitle={Proceedings of the main conference on Human Language Technology Conference of the North American Chapter of the Association of Computational Linguistics},
  pages={168--175},
  year={2006},
  organization={Association for Computational Linguistics}
}

@article{Ratnaparkhi1999,
  title={Learning to parse natural language with maximum entropy models},
  author={Ratnaparkhi, A.},
  journal={Machine Learning},
  volume={34},
  number={1},
  pages={151--175},
  year={1999},
  publisher={Springer}
}


@inproceedings{Henderson2003,
  title={Inducing history representations for broad coverage statistical parsing},
  author={Henderson, J.},
  booktitle={Proceedings of the 2003 Conference of the North American Chapter of the Association for Computational Linguistics on Human Language Technology-Volume 1},
  pages={24--31},
  year={2003},
  organization={Association for Computational Linguistics}
}

@book{Chomsky1965,
  title={Aspects of the Theory of Syntax},
  author={Chomsky, N.},
  volume={119},
  year={1965},
  publisher={The MIT press}
}

@book{Tesniere1959,
  title={El{\'e}ments de syntaxe structurale},
  author={Tesni{\`e}re, L.},
  year={1959},
  publisher={Libraire C. Klincksieck}
}

@book{Hudson1984,
  title={Word grammar},
  author={Hudson, R.A.},
  year={1984},
  publisher={Blackwell Oxford}
}

@book{Melcuk1988,
  title={Dependency syntax: theory and practice},
  author={Melʹ{\v{c}}uk, I.A.},
  year={1988},
  publisher={State University of New York Press}
}

@article{Covington1990,
  title={Parsing discontinuous constituents in dependency grammar},
  author={Covington, M.A.},
  journal={Computational Linguistics},
  volume={16},
  number={4},
  pages={234--236},
  year={1990},
  publisher={MIT Press}
}

@inproceedings{Eisner1996,
  title={Three new probabilistic models for dependency parsing: An exploration},
  author={Eisner, J.M.},
  booktitle={Proceedings of the 16th conference on Computational linguistics-Volume 1},
  pages={340--345},
  year={1996},
  organization={Association for Computational Linguistics}
}


@inproceedings{ratnaparkhi1996maximum,
  title={A maximum entropy model for part-of-speech tagging},
  author={Ratnaparkhi, A. and others},
  booktitle={Proceedings of the conference on empirical methods in natural language processing},
  volume={1},
  pages={133--142},
  year={1996}
}

@book{Tutte1984,
  title={{Graph Theory}},
  author={Tutte, W.},
  year={1984},
  publisher={Addison-Wesley, Reading, MA}
}

@conference{DSmithSmith2007,
  title={{Probabilistic models of nonprojective dependency trees}},
  author={Smith, D. A. and Smith, N. A.},
  booktitle={Proc. EMNLP-CoNLL},
  year={2007}
}


@conference{Koo2007,
  title={{Structured prediction models via the matrix-tree theorem}},
  author={Koo, T. and Globerson, A. and Carreras, X. and Collins, M.},
  booktitle={Proc. EMNLP},
  year={2007}
}


@InProceedings{McDonald2007,
  title =	"On the Complexity of Non-Projective Data-Driven Dependency Parsing",
  booktitle = {Proc. of IWPT},
  author =	"R. McDonald and G. Satta",
  year = 	"2007",
}

@inproceedings{McDonald2006CoNLL,
   author = "McDonald, R. and Lerman, K. and Pereira, F.",
   title = "Multilingual Dependency Analysis with a Two-Stage Discriminative Parser",
   year = "2006",
   booktitle = "Proc. of CoNLL",
}

@InProceedings{DSmith2008,
  author    = {Smith, D. A.  and  Eisner, J.},
  title     = {Dependency Parsing by Belief Propagation},
  booktitle = {Proc. of EMNLP},
  year      = {2008},
}

@InProceedings{Martins2009ACL,
  author    = {Martins, A. F. T.  and  Smith, N. A. and Xing, E. P.},
  title     = {Concise Integer Linear Programming Formulations for Dependency Parsing},
  booktitle = {Proc. of ACL-IJCNLP},
  year      = {2009}
}

@InProceedings{Koo2010EMNLP,
  author    = {Koo, T. and Rush, A. M. and Collins, M. and Jaakkola, T. and Sontag, D.},
  title     = {Dual Decomposition for Parsing with Non-Projective Head Automata},
  booktitle = {EMNLP},
  year      = {2010},
}

@conference{Koo2010,
  title={{Efficient third-order dependency parsers}},
  author={Koo, T. and Collins, M.},
  booktitle={Proc. of ACL},
  pages={1--11},
  year={2010}
}

@InProceedings{Nivre2006CoNLL,
  author    = {Nivre, J.  and  Hall, J.  and  Nilsson, J.  and  Eryi{\v g}it, G.  and  Marinov, S.},
  title     = {Labeled Pseudo-Projective Dependency Parsing with Support Vector Machines},
  booktitle = {Procs. of CoNLL},
  year      = {2006}
}

@conference{Huang2010,
  title={{Dynamic programming for linear-time incremental parsing}},
  author={Huang, L. and Sagae, K.},
  booktitle={Proc. of ACL},
  pages={1077--1086},
  year={2010}
}

@inproceedings{Nivre2009,
  title={Non-projective dependency parsing in expected linear time},
  author={Nivre, J.},
  booktitle={Proceedings of the Joint Conference of the 47th Annual Meeting of the ACL and the 4th International Joint Conference on Natural Language Processing of the AFNLP: Volume 1-Volume 1},
  pages={351--359},
  year={2009},
  organization={Association for Computational Linguistics}
}


@inproceedings{mccallum2000maximum,
  title={Maximum entropy Markov models for information extraction and segmentation},
  author={McCallum, A. and Freitag, D. and Pereira, F.},
  booktitle={Proceedings of the Seventeenth International Conference on Machine Learning},
  pages={591--598},
  year={2000},
  organization={Citeseer}
}

@inproceedings{collins2002discriminative,
  title={Discriminative training methods for hidden markov models: Theory and experiments with perceptron algorithms},
  author={Collins, M.},
  booktitle={Proceedings of the ACL-02 conference on Empirical methods in natural language processing-Volume 10},
  pages={1--8},
  year={2002},
  organization={Association for Computational Linguistics}
}

@inproceedings{lafferty2001conditional,
  title={Conditional random fields: Probabilistic models for segmenting and labeling sequence data},
  author={Lafferty, J. and McCallum, A. and Pereira, F.},
  booktitle={Procs. of ICML},
  pages={282--289},
  year={2001}
}

@article{pennTreeBank,
  title={{Building a large annotated corpus of English: The Penn Treebank}},
  author={Marcus, M.P. and Marcinkiewicz, M.A. and Santorini, B.},
  journal={Computational linguistics},
  volume={19},
  number={2},
  pages={313--330},
  year={1993},
  publisher={MIT Press Cambridge, MA, USA}
}

@conference{schutze1995distributional,
  title={{Distributional part-of-speech tagging}},
  author={Sch\"utze, H.},
  booktitle={Proceedings of the seventh conference on European chapter of the Association for Computational Linguistics},
  pages={141--148},
  year={1995},
  organization={Morgan Kaufmann Publishers Inc.}
}

@article{merialdo1994tet,
  title={{Tagging English text with a probabilistic model}},
  author={Merialdo, B.},
  journal={Computational linguistics},
  volume={20},
  number={2},
  pages={155--171},
  year={1994},
  publisher={MIT Press Cambridge, MA, USA}
}

@inproceedings{clark03combining,
 author = {Clark, Alexander},
 title = {Combining distributional and morphological information for part of speech induction},
 booktitle = {Proc. EACL},
 year = {2003},
 }

@conference{klein2004acl,
    author = {Klein, D. and Manning, C.},
    title = {Corpus-based Induction of Syntactic Structure: Models of Dependency and Constituency},
    booktitle = {Proc. ACL},
    year = {2004}
}

@inproceedings{smith2006annealing,
  title={{Annealing structural bias in multilingual weighted grammar induction}},
  author={Noah A. Smith and Jason Eisner},
booktitle = {ACL-44: Proceedings of the 21st International Conference on Computational Linguistics and the 44th annual meeting of the Association for Computational Linguistics},
 year = {2006},
 pages = {569--576},
 location = {Sydney, Australia},
 publisher = {Association for Computational Linguistics},
 address = {Morristown, NJ, USA},
}


@inproceedings{graca2009nips,
  title = {Parameter vs. Posterior Sparisty in Latent Variable Models},
  author = {J.  Gra\c{c}a and K. Ganchev and F. Pereira and B. Taskar},
  booktitle = {Proc. NIPS},
  year = {2009}
}

@InProceedings{bergkirkpatrick2010naacl,
  author    = {Berg-Kirkpatrick, Taylor  and  Bouchard-C\^{o}t\'{e}, Alexandre  and  DeNero, John  and  Klein, Dan},
  title     = {Painless Unsupervised Learning with Features},
  booktitle = {Proc. NAACL},
  month     = {June},
  year      = {2010},
}

@PhdThesis{JoaoThesis,
  author ={Gra\c{c}a, J.},
  title ={Posterior Regularization Framework: Learning Tractable Models with Intractable Constraints},
  school ={Universidade T\'{e}cnica de Lisboa, Instituto Superior T\'{e}cnico},
  year ={2010},
}

@inproceedings{blitzer2007biographies,
  title={Biographies, bollywood, boom-boxes and blenders: Domain adaptation for sentiment classification},
  author={Blitzer, J. and Dredze, M. and Pereira, F.},
  booktitle={Annual Meeting-Association For Computational Linguistics},
  volume={45},
  number={1},
  pages={440},
  year={2007}
}

@InProceedings{lamar-EtAl:2010:Short,
  author    = {Lamar, Michael  and  Maron, Yariv  and  Johnson, Mark  and  Bienenstock, Elie},
  title     = {{SVD} and Clustering for Unsupervised {POS} Tagging},
  booktitle = {Proceedings of the ACL 2010 Conference: Short Papers},
  month     = {July},
  year      = {2010},
  address   = {Uppsala, Sweden},
  publisher = {Association for Computational Linguistics},
  pages     = {215--219},
}

@InProceedings{das-petrov:2011:ACL-HLT2011,
  author    = {Das, Dipanjan  and  Petrov, Slav},
  title     = {Unsupervised Part-of-Speech Tagging with Bilingual Graph-Based Projections},
  booktitle = {Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies},
  month     = {June},
  year      = {2011},
  address   = {Portland, Oregon, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {600--609},
  url       = {http://www.aclweb.org/anthology/P11-1061}
}

@article{brown94mathematic,
  title={{The mathematics of statistical machine translation: Parameter estimation}},
author = {Peter F. Brown and Stephen A. Della Pietra and Vincent J. Della Pietra and Robert L. Mercer},
  journal={Computational linguistics},
  volume={19},
  number={2},
  pages={263--311},
  year={1993},
 publisher = {MIT Press},
 address = {Cambridge, MA, USA},
}

@conference{charniak2009works,
  title={{EM works for pronoun anaphora resolution}},
  author={Charniak, E. and Elsner, M.},
  booktitle={Proceedings of the 12th Conference of the European Chapter of the Association for Computational Linguistics},
  pages={148--156},
  year={2009},
  organization={Association for Computational Linguistics}
}

@conference{johnson2007dtf,
  title={{Why doesn't EM find good HMM POS-taggers}},
  author={Johnson, M},
  booktitle={In Proc. EMNLP-CoNLL},
  year={2007}
}

@inproceedings{banko2004part,
  title={{Part of speech tagging in context}},
  author={Banko, M. and Moore, R.C.},
  booktitle={Proc. COLING},
  year={2004},
}


@inproceedings{wang2005improved,
  title={{Improved estimation for unsupervised part-of-speech tagging}},
  author={Wang, Q.I. and Schuurmans, D.},
  booktitle={Proc. IEEE NLP-KE},
  year={2005},
}

@conference{smith2005acl,
  title={Contrastive estimation: Training log-linear models on unlabeled data},
  author={Smith, N. and Eisner, J.},
  booktitle={Proc. ACL},
  year={2005},
  organization={ACL}
}

@InProceedings{ravi2009acl,
  author    = {Ravi, Sujith  and  Knight, Kevin},
  title     = {Minimized Models for Unsupervised Part-of-Speech Tagging},
  booktitle = {In Proc. ACL},

  year      = {2009},

}


@InProceedings{McDonald2005b,
  title =	"Non-Projective Dependency Parsing using Spanning Tree
		 Algorithms",
  author =	"R. T. McDonald and F. Pereira and K.
		 Ribarov and J. Hajic",
  year = 	"2005",
  booktitle =	"Proc. of HLT-EMNLP",
}

@article{Tarjan1977,
  title={{Finding optimum branchings}},
  author={Tarjan, R.E.},
  journal={Networks},
  volume={7},
  number={1},
  pages={25--36},
  year={1977},
  publisher={[New York] Wiley}
}

@article{Chu1965,
author = "Y. J. Chu and T. H. Liu",
title = "On the Shortest Arborescence of a Directed Graph",
year = "1965",
volume = "14",
journal = "Science Sinica",
pages = "1396--1400"}

@article{Edmonds1967,
author = "J. Edmonds",
year = "1967",
title = "Optimum Branchings",
journal = "Journal of Research of the National Bureau of Standards",
volume = "71B",
pages = "233--240"}

@inproceedings{Reichart09,
 author = {Reichart, Roi and Rappoport, Ari},
 title = {The {NVI} clustering evaluation measure},
 booktitle = {Proc. CONLL},
 year = {2009},
 location = {Boulder, Colorado},
 }

@inproceedings{haghighi2006naacl,
  title={{Prototype-driven learning for sequence models}},
  author={Haghighi, A. and Klein, D.},
  booktitle={Proc. HTL-NAACL},
  year={2006},
  organization={ACL}
}

@article{Meila07,
 author = {Meil\u{a}, Marina},
 title = {Comparing clusterings---an information based distance},
 journal = {J. Multivar. Anal.},
 volume = {98},
 number = {5},
 year = {2007},
 issn = {0047-259X},
 pages = {873--895},
 publisher = {Academic Press, Inc.},
 address = {Orlando, FL, USA},
 }

@inproceedings{RosenbergH07,
  author    = {Andrew Rosenberg and
               Julia Hirschberg},
  title     = {V-Measure: A Conditional Entropy-Based External Cluster Evaluation Measure},
  booktitle = {EMNLP-CoNLL},
  year      = {2007},
  pages     = {410-420},
}


@inproceedings{Klein2002,
  title={A generative constituent-context model for improved grammar induction},
  author={Klein, D. and Manning, C.D.},
  booktitle={Proceedings of the 40th Annual Meeting on Association for Computational Linguistics},
  pages={128--135},
  year={2002},
  organization={Association for Computational Linguistics}
}

@inproceedings{Smith2005,
  title={Guiding unsupervised grammar induction using contrastive estimation},
  author={Smith, N.A. and Eisner, J.},
  booktitle={Proc. of IJCAI Workshop on Grammatical Inference Applications},
  year={2005},
  organization={Citeseer}
}

@inproceedings{Cohen2008,
  title={Logistic normal priors for unsupervised probabilistic grammar induction},
  author={Cohen, S.B. and Gimpel, K. and Smith, N.A.},
  booktitle={In NIPS},
  year={2008},
  organization={Citeseer}
}

@inproceedings{conll06st,
title  = "{CoNLL-X} Shared Task on Multilingual Dependency Parsing",
year = "2006",
author = "S. Buchholz and E. Marsi",
booktitle = "Proc. of CoNLL"
}

@article{Surdeanu2008,
  title= "The {CoNLL}-2008 Shared Task on Joint Parsing of Syntactic and Semantic Dependencies",
  author={Surdeanu, M. and Johansson, R. and Meyers, A. and M{\`a}rquez, L. and Nivre, J.},
  journal={Proc. of CoNLL},
  year={2008}
}

@inproceedings{Eisner1999,
author = "J. Eisner and G. Satta",
title = "Efficient Parsing for Bilexical Context-Free Grammars and Head Automaton Grammars",
booktitle = "Proc. of ACL",
year = "1999",
}

@conference{Carreras2007,
  title={{Experiments with a higher-order projective dependency parser}},
  author={Carreras, X.},
  booktitle={Proc. of CoNLL},
  year={2007}
}


@book{Vapnik1995,
  author =	"N. Vladimir Vapnik",
  year = 	"1995",
  publisher =	"Springer-Verlag, New York.",
  title =	"The Nature of Statistical Learning Theory",
}

@book{Hopcroft1979,
  title={Introduction to automata theory, languages, and computation},
  author={Hopcroft, J.E. and Motwani, R. and Ullman, J.D.},
  volume={3},
  year={1979},
  publisher={Addison-wesley Reading, MA}
}

@article{Hunter:2007,
        Author = {Hunter, John D.},
        Journal = {Computing In Science \& Engineering},
        Month = {May-Jun},
        Number = {3},
        Pages = {90--95},
        Publisher = {IEEE COMPUTER SOC},
        Times-Cited = {21},
        Title = {Matplotlib: A 2D graphics environment},
        Type = {Editorial Material},
        Volume = {9},
        Year = {2007},}
        

@book{Manning2008,
  title={Introduction to information retrieval},
  author={Manning, C.D. and Raghavan, P. and Sch{\"u}tze, H.},
  volume={1},
  year={2008},
  publisher={Cambridge University Press Cambridge, UK}
}
        
@inproceedings{McCallum1998,
  title={A comparison of event models for naive bayes text classification},
  author={McCallum, A. and Nigam, K.},
  booktitle={AAAI-98 workshop on learning for text categorization},
  volume={752},
  pages={41--48},
  year={1998},
  organization={Citeseer}
}

@book{bertsekas1995np,
  title={Nonlinear programming},
  author={Bertsekas, D.P. and Homer, M.L. and Logan, D.A. and Patek, S.D.},
  publisher={Athena Scientific},
  year={1995}
}

@book{boyd2004convex,
  title={Convex optimization},
  author={Boyd, S.P. and Vandenberghe, L.},
  year={2004},
  publisher={Cambridge Univ Pr}
}

@book{Nocedal1999,
  title={Numerical optimization},
  author={Nocedal, J. and Wright, S.J.},
  year={1999},
  publisher={Springer verlag}
}  

@Book{Schoelkopf2002,
  author =	"B. Sch{\"o}lkopf and A. J. Smola",
  title =	"Learning with Kernels",
  publisher =	"The MIT Press",
  year = 	"2002",
  address =	"Cambridge, MA",
  URL = "http://www.learning-with-kernels.org"
}

@Book{ShaweTaylor2004,
  author =	"John Shawe-Taylor and Nello Cristianini",
  year = 	"2004",
  month =	"jun",
  publisher =	"CUP",
  title =	"Kernel Methods for Pattern Analysis",
}

@conference{Boser1992,
  title={{A training algorithm for optimal margin classifiers}},
  author={Boser, B.E. and Guyon, I.M. and Vapnik, V.N.},
  booktitle={Proceedings of the fifth annual workshop on Computational learning theory},
  pages={144--152},
  year={1992},
  organization={ACM New York, NY, USA}
}

@article{Cortes1995,
  title={{Support-vector networks}},
  author={Cortes, C. and Vapnik, V.},
  journal={Machine learning},
  volume={20},
  number={3},
  pages={273--297},
  year={1995},
  publisher={Springer}
}


@article{Crammer2006,
  title={{Online Passive-Aggressive Algorithms}},
  author={Crammer, K. and Dekel, O. and Keshet, J. and Shalev-Shwartz, S. and Singer, Y.},
  journal={JMLR},
  volume={7},
  pages={551--585},
  year={2006},
  publisher={MIT Press Cambridge, MA, USA}
}


@article{Crammer2002,
  title={{On the algorithmic implementation of multiclass kernel-based vector machines}},
  author={Crammer, K. and Singer, Y.},
  journal={The Journal of Machine Learning Research},
  volume={2},
  pages={265--292},
  year={2002},
  publisher={MIT Press Cambridge, MA, USA}
}


@conference{ShalevShwartz2007ICML,
  title={{Pegasos: Primal estimated sub-gradient solver for svm}},
  author={Shalev-Shwartz, S. and Singer, Y. and Srebro, N.},
  booktitle={ICML},
  year={2007}
}


@article{Rosenblatt1958,
  title={The perceptron: A probabilistic model for information storage and organization in the brain.},
  author={Rosenblatt, F.},
  journal={Psychological review},
  volume={65},
  number={6},
  pages={386},
  year={1958},
  publisher={American Psychological Association}
}


@ARTICLE{rabiner,
  title={A tutorial on hidden Markov models and selected applications in speech recognition},
  author={L.R. Rabiner},
  journal={In Proc. IEEE},
  volume={77},
  number={2},
  pages={257--286},
  year={1989}
}

@book{Manning1999,
  title={Foundations of statistical natural language processing},
  author={Manning, C.D. and Sch{\"u}tze, H.},
  volume={59},
  year={1999},
  publisher={MIT Press}
}