@ARTICLE{clarksakas11,
AUTHOR = {Alexander Clark and William Gregory Sakas},
TITLE = {Computational Models of First Language Acquisition},
JOURNAL = {Research on Language and Computation},
VOLUME = {8},
NUMBER = {2-3},
PAGES = {101-106},
YEAR = 2011
}
@INPROCEEDINGS{clark11mol,
AUTHOR = {Alexander Clark},
TITLE = {A language theoretic approach to syntactic structure},
BOOKTITLE = {Proceedings of the 12th Meeting on the Mathematics of Language (MOL)},
YEAR = 2011,
ADDRESS = {Nara, Japan},
ABSTRACT = {We consider the idea of defining syntactic structure relative to a language, rather than to a grammar for a language. This allows us to define a notion of hierarchical structure that is independent of the particular grammar, and that depends rather on the properties of various algebraic structures canonically associated with a language.
Our goal is not necessarily to recover the traditional ideas of syntactic structure invented by linguists,
but rather to come up with an objective notion of syntactic structure that can be used for semantic interpretation. The role of syntactic structure is to bring together words and constituents that are apart on the surface, so they can be combined appropriately.
The approach is based on identifying concatenation operations which are non-trivial and using these to constrain the allowable local trees in a structural description.},
URL = {papers/mol12.pdf},
NOTE = {to appear}
}
@INPROCEEDINGS{clark11itg,
AUTHOR = {Alexander Clark},
TITLE = {Inference of Inversion Transduction Grammars},
ABSTRACT = {We present the first polynomial algorithm for learning a class of inversion transduction grammars (ITGs) that implement context free transducers -- functions from strings to strings. The class of transductions that we can learn properly includes all subsequential transductions.
These algorithms are based on a generalisation of distributional learning; we prove correctness of our algorithm under an identification in the limit model. },
BOOKTITLE = {Proceedings of ICML},
YEAR = 2011,
ADDRESS = {Bellevue, Washington},
URL = {papers/icmlTotal.pdf}
}
@ARTICLE{glowacka2011introduction,
TITLE = {Introduction to the Special Topic on Grammar Induction, Representation of Language and Language Learning},
AUTHOR = {G{\l}owacka, D. and Shawe-Taylor, J. and Clark, A. and de la Higuera, C. and Johnson, M.},
JOURNAL = {Journal of Machine Learning Research},
VOLUME = {12},
PAGES = {1425--1428},
YEAR = {2011}
}
@ARTICLE{springerlink:10.1007/s10994-010-5218-3,
AUTHOR = {Clark, Alexander and Costa Florêncio, Christophe and Watkins, Chris},
AFFILIATION = {Department of Computer Science, Royal Holloway, University of London, Egham, TW20 0EX UK},
TITLE = {Languages as hyperplanes: grammatical inference with string kernels},
JOURNAL = {Machine Learning},
PUBLISHER = {Springer Netherlands},
ISSN = {0885-6125},
KEYWORD = {Computer Science},
PAGES = {351-373},
VOLUME = {82},
ISSUE = {3},
URL = {http://dx.doi.org/10.1007/s10994-010-5218-3},
NOTE = {10.1007/s10994-010-5218-3},
ABSTRACT = {Using string kernels, languages can be represented as hyperplanes in a high dimensional feature space. We discuss the language-theoretic properties of this formalism with particular reference to the implicit feature maps defined by string kernels, considering the expressive power of the formalism, its closure properties and its relationship to other formalisms. We present a new family of grammatical inference algorithms based on this idea. We demonstrate that some mildly context-sensitive languages can be represented in this way and that it is possible to efficiently learn these using kernel PCA. We experimentally demonstrate the effectiveness of this approach on some standard examples of context-sensitive languages using small synthetic data sets.},
YEAR = {2011}
}
@INCOLLECTION{clarklappin11handbook,
AUTHOR = {Alexander Clark and Shalom Lappin},
YEAR = {2011},
TITLE = {Computational Learning Theory and Language Acquisition},
BOOKTITLE = {Handbook of Philosophy of Linguistics},
PUBLISHER = {Elsevier},
EDITOR = {R. Kempson and N. Asher and T. Fernando},
URL = {papers/clarklappinphil.pdf},
NOTE = {forthcoming}
}
@ARTICLE{clark10jmlr,
AUTHOR = {Alexander Clark and R\'emi Eyraud and Amaury Habrard},
TITLE = {Using Contextual Representations to Efficiently Learn Context-Free Languages},
JOURNAL = {Journal of Machine Learning Research},
YEAR = 2010,
VOLUME = 11,
PAGES = {2707--2744},
URL = {papers/clark10a.pdf},
ABSTRACT = {We present a polynomial update time algorithm for the inductive inference of a large class of context-free languages using the paradigm of positive data and a membership oracle. We achieve this result by moving to a novel representation, called Contextual Binary Feature Grammars (CBFGs), which are capable of representing richly structured context-free languages as well as some context sensitive languages. These representations explicitly model the lattice structure of the distribution of a set of substrings and can be inferred using a generalisation of distributional learning. This formalism is an attempt to bridge the gap between simple learnable classes and the sorts of highly expressive representations necessary for linguistic representation: it allows the learnability of a large class of context-free languages, that includes all regular languages and those context-free languages that satisfy two simple constraints. The formalism and the algorithm seem well suited to natural language and in particular to the modeling of first language acquisition. Preliminary experimental results confirm the effectiveness of this approach.},
MONTH = OCT
}
@INPROCEEDINGS{clarkyoshinaka10fg,
AUTHOR = {Ryo Yoshinaka and Alexander Clark},
TITLE = {Polynomial Time Learning of Some Multiple Context-Free Languages with a Minimally Adequate Teacher},
BOOKTITLE = {Proceedings of the 15th Conference on Formal Grammar},
YEAR = 2010,
ADDRESS = {Copenhagen, Denmark},
ABSTRACT = {We present an algorithm for the inference of some Multiple Context-Free Grammars
from Membership and Equivalence Queries, using the Minimally Adequate Teacher model of Angluin.
This is an extension of the congruence based methods for learning some Context-Free Grammars
proposed by Clark (ICGI 2010). We define the natural extension of the syntactic congruence to tuples of strings, and demonstrate we can efficiently learn the class of Multiple Context-Free Grammars where the non-terminals correspond to the congruence classes under this relation.},
PDF = {papers/MCFLs_MAT.pdf}
}
@INCOLLECTION{clark10alt,
AUTHOR = {Alexander Clark},
TITLE = {Towards General Algorithms for Grammatical Inference},
NOTE = {Invited Paper},
BOOKTITLE = {Proceedings of ALT},
YEAR = 2010,
ADDRESS = {Canberra, Australia},
PUBLISHER = {Springer},
MONTH = {October},
URL = {papers/alt2010.pdf},
PAGES = {11-30},
ABSTRACT = {Many algorithms for grammatical inference can be viewed as instances of a more general algorithm which maintains a set of primitive elements, which distributionally define sets of strings, and a set of features or tests that constrain various inference rules.
Using this general framework, which we cast as a process of logical inference,
we re-analyse Angluin's famous {\sc lstar} algorithm
and several recent algorithms for the inference of context-free grammars and multiple context-free grammars.
Finally, to illustrate the advantages of this approach, we extend it to the
inference of functional transductions from positive data only, and we present a new algorithm for the inference of finite state transducers.}
}
@INCOLLECTION{clark10cfgscp,
AUTHOR = {Alexander Clark},
TITLE = {Learning Context Free Grammars with the Syntactic Concept Lattice},
BOOKTITLE = {Grammatical Inference: Theoretical Results and Applications. Proceedings of the International Colloquium on Grammatical Inference},
PAGES = {38-51},
PUBLISHER = {Springer},
YEAR = 2010,
EDITOR = {Jos\'e Sempere and Pedro Garcia},
NUMBER = 6339,
PDF = {papers/icgi10scp.pdf},
SERIES = {Lecture Notes in Computer Science},
ABSTRACT = {The Syntactic Concept Lattice is a residuated lattice based on the distributional structure of a language; the natural representation based on this is a context sensitive formalism.
Here we examine the possibility of basing a context free grammar ({\sc cfg}) on the structure of this lattice; in particular by choosing non-terminals to correspond to concepts in this lattice.
We present a learning algorithm for context free grammars which uses positive data and membership queries, and prove its correctness under the identification in the limit paradigm.
Since the lattice itself may be infinite, we consider only a polynomially bounded subset of the set of concepts, in order to get an efficient algorithm.
We compare this on the one hand to learning algorithms for context free grammars,
where the non-terminals correspond to congruence classes,
and on the other hand to the use of context sensitive techniques such as Binary Feature Grammars and Distributional Lattice Grammars.
The class of {\sc cfg}s that can be learned in this way includes inherently ambiguous and thus non-deterministic languages; this approach therefore breaks through an important barrier in {\sc cfg} inference.}
}
@INCOLLECTION{clark10matcfg,
AUTHOR = {Alexander Clark},
TITLE = {Distributional Learning of some Context-free Languages with a Minimally Adequate Teacher},
BOOKTITLE = {Grammatical Inference: Theoretical Results and Applications. Proceedings of the International Colloquium on Grammatical Inference},
PAGES = {24-37},
PUBLISHER = {Springer},
YEAR = 2010,
EDITOR = {Jos\'e Sempere and Pedro Garcia},
NUMBER = 6339,
SERIES = {Lecture Notes in Computer Science},
ADDRESS = {Valencia, Spain},
MONTH = {September},
ABSTRACT = {Angluin showed that the class of regular languages could be learned from a
Minimally Adequate Teacher (MAT) providing membership and equivalence queries.
Clark and Eyraud (2007) showed that some context free grammars can be identified in the limit from positive data alone by identifying the congruence classes of the language. In this paper we consider learnability of context free languages using a MAT. We show that there is a natural class of context free languages, that includes the class of regular languages, that can be polynomially learned from a MAT, using an algorithm that is an extension of
Angluin's LSTAR algorithm.}
}
@PROCEEDINGS{DBLP:conf/icgi/2008,
EDITOR = {Alexander Clark and
Fran\c{c}ois Coste and
Laurent Miclet},
TITLE = {Grammatical Inference: Algorithms and Applications, 9th
International Colloquium, ICGI 2008, Saint-Malo, France,
September 22-24, 2008, Proceedings},
BOOKTITLE = {ICGI},
PUBLISHER = {Springer},
SERIES = {Lecture Notes in Computer Science},
VOLUME = {5278},
YEAR = {2008},
ISBN = {978-3-540-88008-0},
URL = {http://www.springer.com/computer/ai/book/978-3-540-88008-0}
}
@INPROCEEDINGS{clark10conll,
AUTHOR = {Alexander Clark},
TITLE = {Efficient, Correct, Unsupervised Learning of Context-Sensitive Languages},
BOOKTITLE = {Proceedings of the Fourteenth Conference on Computational Natural Language Learning},
YEAR = 2010,
MONTH = {July},
PAGES = {28--37},
ADDRESS = {Uppsala, Sweden},
ORGANIZATION = {Association for Computational Linguistics},
URL = {papers/conll2010.pdf},
ABSTRACT = {A central problem for NLP is grammar induction: the development of unsupervised learning algorithms for syntax. In this paper we
present a lattice-theoretic representation for natural language syntax, called Distributional Lattice Grammars.
These representations are objective or empiricist, based on a generalisation of distributional learning, and are capable of representing all regular languages, some but not all context-free languages and some non-context-free languages. We present a simple algorithm for learning these grammars together with
a complete self-contained proof of the correctness and efficiency of the algorithm.}
}
@BOOK{clark10handbook,
EDITOR = {Alexander Clark and Chris Fox and Shalom Lappin},
TITLE = {The Handbook of Computational Linguistics and Natural Language Processing},
PUBLISHER = {Wiley-Blackwell},
YEAR = 2010,
URL = {http://eu.wiley.com/WileyCDA/WileyTitle/productCd-1405155817.html}
}
@BOOK{clarklappinPoverty,
TITLE = {{Linguistic Nativism and the Poverty of the Stimulus}},
AUTHOR = {Alexander Clark and Shalom Lappin},
YEAR = {2011},
PUBLISHER = {Wiley-Blackwell},
URL = {http://eu.wiley.com/WileyCDA/WileyTitle/productCd-1405187840.html}
}
@INCOLLECTION{clarklappin10chapter,
AUTHOR = {Alexander Clark and Shalom Lappin},
TITLE = {Unsupervised Learning and Grammar Induction},
BOOKTITLE = {The Handbook of Computational Linguistics and Natural Language Processing},
PAGES = {197-220},
PUBLISHER = {Wiley-Blackwell},
YEAR = 2010,
EDITOR = {Alexander Clark and Chris Fox and Shalom Lappin}
}
@INCOLLECTION{clark10lata,
AUTHOR = {Alexander Clark},
TITLE = {Three learnable models for the description of language},
BOOKTITLE = {Language and Automata Theory and Applications, Fourth International Conference, LATA 2010},
YEAR = 2010,
PAGES = {16--31},
EDITOR = {Adrian-Horia Dediu, Henning Fernau, Carlos Martín-Vide},
URL = {papers/threeLearnableLATA.pdf},
ABSTRACT = {Learnability is a vital property of formal grammars:
representation classes should be defined in such a way that they are learnable. One way to build learnable representations is by making them objective or empiricist: the structure of the representation should be based on the structure of the language. Rather than defining a function from representation to language we should start by defining a function from the language to the representation:
following this strategy gives classes of representations that are easy to learn.
We illustrate this approach with three classes, defined in analogy to the lowest three levels of the Chomsky hierarchy.
First, we recall the canonical deterministic finite automaton, where the states of the automaton correspond to the right congruence classes of the language.
Secondly, we define context free grammars where the non-terminals of the grammar correspond to the syntactic congruence classes, and where the
productions are defined by the syntactic monoid;
finally we define a residuated lattice structure from the Galois connection between strings and contexts, which we call the syntactic concept lattice,
and base a representation on this, which
allows us to define a class of languages that includes some non-context free languages, many context-free languages and all regular languages.
All three classes are efficiently learnable under suitable learning paradigms.},
SERIES = {LNCS},
PUBLISHER = {Springer}
}
@INPROCEEDINGS{clark09clagi,
AUTHOR = {Alexander Clark and R\'emi Eyraud and Amaury Habrard},
TITLE = {A note on Contextual Binary Feature Grammars},
BOOKTITLE = {EACL 2009 workshop on
Computational Linguistic Aspects of Grammatical Inference},
YEAR = 2009,
ADDRESS = {Athens, Greece},
ORGANIZATION = {ACL},
URL = {papers/W09-1006.pdf},
ABSTRACT = {Contextual Binary Feature Grammars
were recently proposed by (Clark et al.,
2008) as a learnable representation for
richly structured context-free and context
sensitive languages. In this paper
we examine the representational
power of the formalism, its relationship
to other standard formalisms and language
classes, and its appropriateness
for modelling natural language.}
}
@INPROCEEDINGS{clark09fg,
AUTHOR = {Alexander Clark},
TITLE = {A learnable representation for syntax using residuated lattices},
BOOKTITLE = {Proceedings of the 14th Conference on Formal Grammar},
URL = {papers/alexcFG2009.pdf},
YEAR = 2009,
ADDRESS = {Bordeaux, France},
ABSTRACT = {We propose a representation for natural language syntax based on
the theory of residuated lattices: in particular on the Galois lattice between contexts and substrings, which we call the syntactic concept lattice.
The natural representation derived from this is a richly structured
context sensitive formalism that can be learned using a generalisation of distributional learning. In this paper we define the basic algebraic properties of the syntactic concept lattice,
together with a representation derived from this lattice and
discuss the generative power of the formalism. We establish some basic results which show that these representations, because they are defined language theoretically, can be inferred from information about the set of grammatical strings of the language.
We also discuss the relation to other grammatical formalisms notably categorial grammar and context free grammars.
We claim that this lattice based formalism is plausibly both learnable from evidence about the grammatical
strings of a language and may be powerful enough to represent natural languages, and thus presents a potential solution to the central problem of theoretical linguistics.}
}
@INPROCEEDINGS{clarklappin09,
AUTHOR = {Clark, Alexander and Lappin, Shalom},
TITLE = {Another look at indirect negative evidence},
BOOKTITLE = {Proceedings of the EACL Workshop on Cognitive Aspects of
Computational Language Acquisition},
YEAR = 2009,
ADDRESS = {Athens},
URL = {papers/negativeClarkLappin.pdf},
MONTH = {March},
ABSTRACT = {Indirect negative evidence is clearly an important way for learners to
constrain overgeneralisation, and yet a good learning theoretic
analysis has yet to be provided for this, whether in a PAC or a
probabilistic identification in the limit framework. In this paper we suggest a
theoretical analysis of indirect negative evidence that allows the
presence of ungrammatical strings in the input
and also accounts for the relationship between
grammaticality/acceptability and probability.
Given independently justified assumptions about lower bounds on the
probabilities of grammatical strings, we establish that a limited
number of membership queries of some strings can be probabilistically
simulated.}
}
@INPROCEEDINGS{clark08bfg,
AUTHOR = {Clark, Alexander and Eyraud, R\'emi and Habrard, Amaury},
TITLE = {A Polynomial Algorithm for the Inference of Context Free Languages},
BOOKTITLE = {Proceedings of International Colloquium on Grammatical Inference},
YEAR = 2008,
MONTH = {September},
URL = {papers/clarkBFGIcgi08.pdf},
PAGES = {29--42},
PUBLISHER = {Springer},
ABSTRACT = {We present a polynomial algorithm for the inductive inference of a
large class of context free languages, that includes all regular
languages.
The algorithm uses a representation which we call
Binary Feature Grammars based on a set of features, capable of
representing richly
structured context free languages as well as some context sensitive
languages.
More precisely, we focus on a particular case of this representation
where the features correspond to contexts appearing in the language.
Using the paradigm of positive data and a membership oracle, we can
establish that all context free languages that satisfy two
constraints on the context distributions can be identified in the
limit by this approach. The polynomial time algorithm we propose is based on a
generalisation of distributional learning and uses the lattice of context
occurrences.
The formalism and the algorithm seem well suited to natural language
and in particular to the modelling of first language acquisition.
}
}
@ARTICLE{clark08parikh,
AUTHOR = {Alexander Clark and Chris Watkins},
TITLE = {Some Alternatives to {Parikh} Matrices Using String Kernels},
JOURNAL = {Fundamenta Informaticae},
YEAR = 2008,
VOLUME = 84,
NUMBER = {3--4},
PAGES = {291-303},
ABSTRACT = {We describe methods of representing strings as real valued vectors or matrices; we show how to integrate two separate lines of enquiry: string kernels, developed in machine learning, and Parikh matrices, which have been studied intensively over the last few years as a powerful tool in the study of combinatorics over words. In the field of machine learning, there is widespread use of string kernels, which use analogous mappings into high dimensional feature spaces based on the occurrences of subwords or factors. In this paper we show how one can use string kernels to construct two alternatives to Parikh matrices, that overcome some of the limitations of the Parikh matrix construction. These are morphisms from the free monoid to rings of real-valued matrices under multiplication: one is based on the subsequence kernel and the other on the gap-weighted string kernel. For the latter kernel we demonstrate that for many values of the gap-weight hyper-parameter the resulting morphism is injective.}
}
@ARTICLE{clark07a,
AUTHOR = {Clark, Alexander and Eyraud, R\'emi},
TITLE = {Polynomial Identification in the Limit of Substitutable Context-free Languages},
JOURNAL = {Journal of Machine Learning Research},
YEAR = {2007},
VOLUME = 8,
PAGES = {1725--1745},
MONTH = {August},
URL = {papers/clark07a.pdf},
ABSTRACT = {This paper formalises the idea of substitutability introduced by
Zellig Harris in the 1950s and makes it the basis for a learning
algorithm from positive data only for a subclass of context-free
languages. We show that there is a polynomial characteristic set, and
thus prove polynomial identification in the limit of this class. We
discuss the relationship of this class of languages to other common
classes discussed in grammatical inference. It transpires that it is
not necessary to identify constituents in order to learn a
context-free language -- it is sufficient to identify the syntactic
congruence, and the operations of the syntactic monoid can be
converted into a context-free grammar. We also discuss modifications
to the algorithm that produces a reduction system rather than a
context-free grammar, that will be much more compact. We discuss the
relationship to Angluin's notion of reversibility for regular
languages. We also demonstrate that an implementation of this
algorithm is capable of learning a classic example of structure
dependent syntax in English: this constitutes a refutation of an
argument that has been used in support of nativist theories of
language.}
}
@INPROCEEDINGS{clark06ecml,
AUTHOR = {Clark, Alexander and Costa Flor\^{e}ncio, Christophe and Watkins, Chris},
TITLE = {Languages as Hyperplanes: grammatical inference with string kernels},
BOOKTITLE = {Proceedings of the European Conference on Machine Learning ({ECML})},
YEAR = 2006,
ABSTRACT = {Using string kernels, languages can be represented as hyperplanes in a high dimensional feature space.
We present a new family
of grammatical inference algorithms based on this idea.
We demonstrate that some mildly context sensitive languages can be represented in this way and it is possible to efficiently learn these using kernel PCA.
We present some experiments demonstrating the effectiveness of this approach on some standard examples of context sensitive languages using small synthetic data sets.},
PAGES = { 90-101},
URL = {papers/ecmlGiskFinal.pdf}
}
@INPROCEEDINGS{clark06d,
AUTHOR = {Alexander Clark},
TITLE = {Large scale inference of deterministic transductions: {Tenjinno} problem 1},
BOOKTITLE = {Proceedings of the 8th International Colloquium on Grammatical Inference ({ICGI})},
PAGES = {227-239},
ABSTRACT = {We discuss the problem of large scale grammatical inference in the context of
the Tenjinno competition, with reference to the inference of deterministic
finite state transducers, and discuss the design of the algorithms and the design and implementation of the program that solved the first problem.
Though the OSTIA algorithm has good asymptotic guarantees for this class of problems, the amount of data required is prohibitive. We therefore developed a new strategy for inferring large scale transducers that is more adapted for large random instances of the type in question, which involved
combining traditional state merging algorithms for inference of finite state automata with EM based alignment algorithms and state splitting algorithms.
},
URL = {papers/42010227.pdf},
YEAR = 2006
}
@ARTICLE{clark07,
AUTHOR = {Alexander Clark},
TITLE = {Learning Deterministic Context Free Grammars: the {Omphalos} Competition},
JOURNAL = {Machine Learning},
YEAR = 2007,
VOLUME = 66,
NUMBER = 1,
PAGES = {93-110},
MONTH = {January},
ABSTRACT = {This paper describes the winning entry to the Omphalos context free grammar learning competition. We describe a context-free grammatical inference algorithm operating on positive data only, which integrates an information theoretic constituent likelihood measure together with more traditional heuristics based on substitutability and frequency. The competition is discussed from the perspective of a competitor. We discuss a class of deterministic grammars, the Non-terminally Separated (NTS) grammars, that have a property relied on by our algorithm, and consider the possibilities of extending the algorithm to larger classes of languages.},
URL = {papers/omphalos.pdf}
}
@INPROCEEDINGS{clark06c,
AUTHOR = {Alexander Clark},
TITLE = {{PAC}-learning unambiguous {NTS} languages},
BOOKTITLE = {Proceedings of the 8th International Colloquium on Grammatical Inference ({ICGI})},
PAGES = {59-71},
URL = {papers/42010059.pdf},
ABSTRACT = {Non-terminally separated (NTS) languages are a subclass of
deterministic context free languages where there is a stable relationship
between the substrings of the language and the non-terminals of the
grammar.
We show that when the distribution of samples is generated by a PCFG,
based on the same grammar as the target language,
the class of unambiguous NTS languages is PAC-learnable from
positive data alone, with
polynomial bounds on data and computation.
},
YEAR = 2006
}
@INPROCEEDINGS{clark06b,
AUTHOR = {Clark, Alexander and Costa Flor\^{e}ncio, Christophe and Watkins, Chris and Serayet, Mariette},
TITLE = {Planar Languages and Learnability},
BOOKTITLE = {Proceedings of the 8th International Colloquium on Grammatical Inference ({ICGI})},
PAGES = {148-160},
URL = {papers/42010148.pdf},
YEAR = 2006,
ABSTRACT = {Strings can be mapped into Hilbert spaces using feature maps such as
the Parikh map. Languages can then be defined as the pre-image of
hyperplanes in the feature space, rather than using grammars or
automata. These are the \emph{planar} languages.
In this paper we show that using techniques from
kernel-based learning, we can represent and efficiently learn, from
positive data alone, various linguistically interesting context-sensitive
languages. In particular we show that the cross-serial dependencies in
Swiss German, that established the non-context-freeness of natural
language, are learnable using a standard kernel.
We demonstrate the polynomial-time identifiability in
the limit of these classes, and discuss some language theoretic
properties of these classes, and their relationship to the choice of
kernel/feature map.}
}
@INPROCEEDINGS{clark06a,
AUTHOR = {Alexander Clark and Remi Eyraud},
TITLE = {Learning Auxiliary Fronting with Grammatical Inference},
BOOKTITLE = {Proceedings of {CoNLL}},
YEAR = 2006,
ADDRESS = {New York},
PAGES = {125-132},
URL = {papers/conllClark.pdf},
ABSTRACT = {We present a simple context-free grammatical inference algorithm,
and prove that it is capable of learning an interesting subclass of
context-free
languages. We also demonstrate that an implementation of this algorithm is
capable of learning auxiliary fronting in polar interrogatives (AFIPI) in English.
This has been
one of the most important test cases in language acquisition over the last few
decades.
We demonstrate that learning can proceed even in the complete absence of examples of particular constructions, and thus that debates about the frequency of occurrence of such constructions are irrelevant.
We discuss the implications of this on the type of innate learning
biases that must be hypothesized to explain first language acquisition. }
}
@INPROCEEDINGS{georgescul06a,
AUTHOR = {Georgescul, Maria and Clark, Alexander and Armstrong, Susan},
TITLE = {Word Distributions for Thematic Segmentation in a Support Vector Machine Approach},
BOOKTITLE = {Proceedings of {CoNLL}},
PAGES = {101-108},
YEAR = 2006,
ADDRESS = {New York},
ABSTRACT = {We investigate the appropriateness of using a technique based on support vector machines for identifying thematic structure of text streams. The thematic segmentation task is modeled as a binary-classification problem, where the different classes correspond to the presence or the absence of a thematic boundary. Experiments are conducted with this approach by using features based on word distributions through text. We provide empirical evidence that our approach is robust, by showing good performance on three different data sets. In particular, substantial improvement is obtained over previously published results of word-distribution based systems when evaluation is done on a corpus of recorded and transcribed multi-party dialogs.},
URL = {papers/conllGeorgescul.pdf}
}
@INPROCEEDINGS{georgescul06b,
AUTHOR = {Georgescul, Maria and Clark, Alexander and Armstrong, Susan},
TITLE = { An Analysis of Methodological and Quantitative Aspects
in the Evaluation of Thematic Segmentation Algorithms},
BOOKTITLE = {Proceedings of {SIGDIAL}},
YEAR = 2006,
ADDRESS = {Sydney, Australia},
PDF = {papers/W06-1320.pdf},
ABSTRACT = {We consider here the task of linear thematic segmentation of text documents, by using features based on word distributions in the text. For this task, a typical and often implicit assumption in previous studies is that a document has just one topic and therefore many algorithms have been tested and have shown encouraging results on artificial data sets, generated by putting together parts of different documents. We show that evaluation on synthetic data is potentially misleading and fails to give an accurate evaluation of the performance on real data. Moreover, we provide a critical review of existing evaluation metrics in the literature and we propose an improved evaluation metric.}
}
@INPROCEEDINGS{clark03shallow,
AUTHOR = {Alexander Clark},
TITLE = {Pre-processing very noisy text},
BOOKTITLE = {Proceedings of Workshop on Shallow Processing of Large Corpora},
YEAR = 2003,
SERIES = {Corpus Linguistics},
ABSTRACT = {Existing techniques for tokenisation and sentence boundary identification are extremely accurate whenthe data is perfectly clean (Mikheev, 2002), and have been applied successfully to corpora of news feeds and other post-edited corpora. Informal written texts are readily available, and with the growth ofother informal text modalities (IRC, ICQ, SMS etc.) are becoming an interesting alternative, perhaps better suited as a source for lexical resources and language models for studies of dialogue andspontaneous speech. However, the high degree of spelling errors, irregularities and idiosyncrasies in the use of punctuation, white space and capitalisation require specialised tools. In this paper we study thedesign and implementation of a tool for pre-processing and normalisation of noisy corpora. We argue that rather than having separate tools for tokenisation, segmentation and spelling correction organisedin a pipeline, a unified tool is appropriate because of certain specific sorts of errors. We describe how a noisy channel model can be used at the character level to perform this. We describe how the sequenceof tokens needs to be divided into various types depending on their characteristics, and also how the modelling of white-space needs to be conditioned on the type of the preceding and following tokens.We use trainable stochastic transducers to model typographical errors, and other orthographic changes and a variety of sequence models for white space and the different sorts of tokens. We discuss thetraining of the models and various efficiency issues related to the decoding algorithm, and illustrate this with examples from a 100 million word corpus of Usenet news.},
ADDRESS = {Lancaster},
PDF = {papers/SprolacPaper.doc.pdf}
}
@INPROCEEDINGS{clark06e,
AUTHOR = {Alexander Clark and Remi Eyraud},
TITLE = {Learning Auxiliary Fronting with Grammatical Inference},
BOOKTITLE = {Proceedings of the 28th Annual meeting of the Cognitive Science Society},
YEAR = 2006,
ADDRESS = {Vancouver, Canada},
ABSTRACT = {We present a simple context-free grammatical inference algorithm,
and prove that it is capable of learning an interesting subclass of
context-free
languages. We also demonstrate that an implementation of this algorithm is
capable of learning auxiliary fronting in polar interrogatives (AFIPI) in English.
This has been
one of the most important test cases in language acquisition over the last few
decades.
We demonstrate that learning can proceed even in the complete absence of examples of particular constructions, and thus that debates about the frequency of occurrence of such constructions are irrelevant.
We discuss the implications of this on the type of innate learning
biases that must be hypothesized to explain first language acquisition.},
PDF = {papers/clarkCogsci.pdf},
NOTE = {This paper is closely related to the paper in CONLL, but is more directed to a cognitive science audience.}
}
@BOOK{Psychocomp:2005,
EDITOR = {William Gregory Sakas and Alexander Clark and James Cussens and Aris Xanthos},
TITLE = {Proceedings of the Workshop on Psychocomputational Models of Human Language Acquisition},
MONTH = {June},
YEAR = {2005},
ADDRESS = {Ann Arbor, Michigan},
PUBLISHER = {Association for Computational Linguistics}
}
@INPROCEEDINGS{clark05,
AUTHOR = {Clark, Alexander and Eyraud, Remi},
TITLE = {Identification in the Limit of Substitutable Context Free Languages},
BOOKTITLE = {Proceedings of The 16th International Conference on
Algorithmic Learning Theory},
PAGES = {283-296},
YEAR = 2005,
EDITOR = {Sanjay Jain and Hans Ulrich Simon and Etsuji Tomita},
PUBLISHER = {Springer-Verlag},
URL = {papers/alt2005published.pdf},
ABSTRACT = {This paper formalises the idea of substitutability introduced by
Zellig Harris in the 1950s and makes it the basis for a learning
algorithm from positive data only for a subclass of context-free
grammars. We show that there is a polynomial characteristic set,
and thus prove polynomial identification in the limit of this
class. We discuss the relationship of this class of languages to
other common classes discussed in grammatical inference. We also
discuss modifications to the algorithm that produces a reduction
system rather than a context-free grammar, that will be much more
compact. We discuss the relationship to Angluin's notion of
reversibility for regular languages.}
}
@INPROCEEDINGS{clark04c,
AUTHOR = {Clark, Alexander},
TITLE = {Grammatical Inference and First Language Acquisition},
BOOKTITLE = {Workshop on Psycho-computational Models of Human Language Acquisition},
YEAR = 2004,
ADDRESS = {Geneva, Switzerland},
MONTH = {August},
ABSTRACT = {One argument for parametric models of language has been learnability in the context of first language acquisition.
The claim is made that ``logical'' arguments from learnability theory require non-trivial constraints on the class of languages.
Initial formalisations of the problem (Gold, 1967) are however inapplicable to this particular situation.
In this paper we construct an appropriate formalisation of the problem using a modern vocabulary drawn from statistical learning theory and grammatical inference
and looking in detail at the relevant empirical facts.
We claim that a
variant of the Probably Approximately Correct (PAC) learning framework (Valiant, 984) with positive samples only, modified so it is not completely distribution free is the appropriate choice.
Some negative results derived from cryptographic problems appear to apply in this situation
but the existence of algorithms with provably good performance and subsequent work,
shows how these negative results are not as strong as they initially appear, and that recent algorithms for learning
regular languages partially satisfy our criteria.
We then discuss the applicability of these results to parametric and non-parametric models.},
URL = {papers/colingWorkshop.pdf}
}
@INPROCEEDINGS{clark04b,
AUTHOR = {Clark, Alexander and Thollard, Franck},
TITLE = {Partially Distribution-Free Learning of Regular Languages from Positive Samples},
BOOKTITLE = {Proceedings of COLING},
YEAR = 2004,
PAGES = {85--91},
ADDRESS = {Geneva, Switzerland},
URL = {papers/ClarkThollard154.pdf},
ABSTRACT = {Regular languages are widely used in NLP today in spite of their shortcomings.
Efficient algorithms that can reliably learn these languages, and which must in
realistic applications only
use positive samples, are necessary.
These languages are not learnable under traditional distribution free
criteria.
We claim that an appropriate learning framework is PAC learning where the distributions are constrained
to be generated by a class of stochastic automata with support equal to the target concept.
We discuss how this is related to other learning paradigms.
We then present a simple learning algorithm for regular languages,
and a self-contained proof that it learns according to this partially
distribution free criterion.}
}
@ARTICLE{clark04a,
AUTHOR = {Clark, Alexander and Thollard, Franck},
TITLE = {{PAC}-learnability of Probabilistic Deterministic Finite State Automata},
JOURNAL = {Journal of Machine Learning Research},
YEAR = 2004,
VOLUME = 5,
PAGES = {473-497},
MONTH = {May},
URL = {papers/clark04a.pdf},
ABSTRACT = {We study the learnability of Probabilistic Deterministic Finite State Automata under a modified PAC-learning criterion. We argue that it is necessary to add additional parameters to the sample complexity polynomial, namely a bound on the expected length of strings generated from any state, and a bound on the distinguishability between states. With this, we demonstrate that the class of PDFAs is PAC-learnable using a variant of a standard state-merging algorithm and the Kullback-Leibler divergence as error function.},
NOTEX = {There is an error in one of the bounds in the published paper. I am grateful to Satyaki Mahalanabis for pointing this out. Please email me for a correction, which gives a modified bound, and a slightly simplified proof.}
}
@UNPUBLISHED{clark03unpublished,
AUTHOR = {Clark, Alexander and Thollard, Franck},
TITLE = {PAC-learnability of Probabilistic Deterministic Finite-State Automata},
NOTE = {Manuscript},
YEAR = 2003
}
@INPROCEEDINGS{clark-01,
AUTHOR = {Clark, Alexander},
TITLE = {Learning Morphology with {Pair Hidden Markov Models}},
ABSTRACT = { In this paper I present a novel Machine Learning approach to the acquisition of stochastic string transductions based on
Pair Hidden Markov Models (PHMMs), a model used in computational biology.
I show how these models can be used to learn morphological processes in a variety of languages, including English, German and Arabic.
Previous techniques for learning morphology have been restricted to languages with essentially concatenative morphology.},
BOOKTITLE = {Proc. of the Student Workshop at the 39th Annual Meeting of the Association for Computational Linguistics},
YEAR = 2001,
ADDRESS = {Toulouse, France},
MONTH = {July},
PAGES = {55-60},
URL = {papers/eacl01.pdf},
PS = {papers/eacl01.ps}
}
@INPROCEEDINGS{clark-00,
AUTHOR = {Clark, Alexander},
TITLE = {Inducing Syntactic Categories by Context
Distribution Clustering},
PAGES = {91-94},
YEAR = {2000},
BOOKTITLE = {Proc. of Conference on Computational Natural Language Learning},
ADDRESS = {Lisbon, Portugal},
PDF = {papers/09194cla.pdf},
PS = {papers/09194cla.ps}
}
@INPROCEEDINGS{clark-01b,
AUTHOR = {Clark, Alexander},
TITLE = {Partially Supervised Learning of Morphology with Stochastic Transducers},
BOOKTITLE = {Proc. of Natural Language Processing Pacific Rim Symposium, NLPRS 2001},
YEAR = 2001,
ADDRESS = {Tokyo, Japan},
MONTH = {November},
PAGES = {341-348},
PDF = {papers/nlprs01.pdf}
}
@INPROCEEDINGS{clark-01a,
AUTHOR = {Clark, Alexander},
TITLE = {Unsupervised Induction of Stochastic Context Free Grammars with Distributional Clustering},
BOOKTITLE = {Proc. of Conference on Computational Natural Language Learning},
YEAR = 2001,
ADDRESS = {Toulouse, France},
MONTH = {July},
PAGES = {105-112},
PDF = {papers/conll01-final.pdf},
PS = {papers/conll01-final.ps}
}
@MASTERSTHESIS{clark-98,
TITLE = {Random Fields in NLP},
YEAR = {1998},
AUTHOR = {Clark, Alexander},
SCHOOL = {COGS, University of Sussex},
PS = {papers/field.ps.gz},
TYPE = {{M. Sc.} Dissertation},
NOTES = { This was an experiment (only partially successful) on the use of random fields to produce a statistical model over trees in the Susanne Corpus}
}
@PHDTHESIS{clark-01c,
AUTHOR = {Clark, Alexander},
TITLE = {Unsupervised Language Acquisition: Theory and Practice},
SCHOOL = {COGS, University of Sussex},
URL = {papers/thesis.pdf},
YEAR = 2001
}
@INPROCEEDINGS{clark-02,
AUTHOR = {Clark, Alexander},
TITLE = {Memory-Based Learning of Morphology with Stochastic Transducers},
BOOKTITLE = {Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics (ACL)},
YEAR = 2002,
PAGES = {513--520},
URL = {papers/acl02.pdf},
ABSTRACT = {This paper discusses the supervised learning of morphology using stochastic transducers,
trained using the Expectation-Maximization (EM) algorithm.
Two approaches are presented: first, using the transducers directly to model the process, and secondly using them to
define a similarity measure, related to the Fisher kernel method,
and then using a Memory-Based Learning (MBL) technique.
These are evaluated and compared on data sets from English, German, Slovene and Arabic.}
}
@INPROCEEDINGS{clark-03,
AUTHOR = {Clark, Alexander},
TITLE = {Combining Distributional and Morphological Information for Part of Speech Induction},
BOOKTITLE = {Proceedings of the tenth Annual Meeting of the {European Association for Computational Linguistics} {(EACL)}},
PAGES = {59--66},
URL = {papers/eacl2003.pdf},
PS = {papers/eacl03.ps},
YEAR = 2003
}
@INPROCEEDINGS{armstrong03,
AUTHOR = {Armstrong, S. and Clark, A. and Coray, G. and Georgescul, M. and Pallotta, V. and Popescu-Belis, A. and Portabella, D. and Rajman, M. and Starlander, M.},
TITLE = {Natural Language Queries on Natural Language Data: a Database of Meeting Dialogues},
BOOKTITLE = {8th International Conference on Applications of Natural Language to Information Systems},
ABSTRACT = {This paper describes an integrated system that enables the storage and retrieval of meeting transcripts (e.g. staff meetings). The system gives users who have not attended a meeting, or who want to review a particular point, enhanced access to an annotated version of the recorded data. This paper describes the various stages in the processing, storage and query of the data. First, we put forward the idea of shallow dialogue processing, in order to extract significant features of the meeting transcriptions for storage in a database, whose structure is briefly outlined. Low-level access to the database is provided as a Web service, which can be connected to several interfaces. A description of how multimodal input can be used with VoiceXML is also provided, thus offering an easy solution for voice and web based access to the dialogue data. The paper ends with considerations about the available data and its use in the current version of the system.},
YEAR = 2003,
ADDRESS = {Burg/Cottbus, Germany}
}
This file has been generated by bibtex2html 1.75