-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwradford.bib
268 lines (249 loc) · 14.1 KB
/
wradford.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
@inproceedings{Estival-PACLING07,
author = {Dominique Estival and Tanja Gaustad and Son Bao Pham and Ben Hutchinson and Will Radford},
booktitle = {Proceedings of the 10th Conference of the Pacific Association for Computational Linguistics},
title = {{Author Profiling for English Emails}},
year = {2007},
month = {September},
pages = {263--272},
publisher = {PACLing},
url = {http://mandrake.csse.unimelb.edu.au/pacling2007/files/final/51/51_Paper_meta.pdf}
}
@inproceedings{Estival-ALTW07,
author = {Dominique Estival and Tanja Gaustad and Son Bao Pham and Ben Hutchinson and Will Radford},
booktitle = {Proceedings of the Australasian Language Technology Workshop 2007},
title = {{TAT}: an author profiling tool with application to Arabic emails},
year = {2007},
month = {December},
publisher = {ALTA},
pages = {21--30},
url = {http://aclweb.org/anthology-new/U/U07/U07-1006.pdf}
}
@INPROCEEDINGS{RadfordEtAl-ALTW09,
author = {Will Radford and Ben Hachey and James R Curran and Maria Milosavljevic},
title = {Tracking Information Flow in Financial Text},
booktitle = {Proceedings of the Australasian Language Technology Association Workshop 2009},
year = {2009},
pages = {11--19},
address = {Sydney, Australia},
month = {December},
url = {http://aclweb.org/anthology-new/U/U09/U09-1003.pdf}
}
@TechReport{radford:honsthesis,
author = {Will Radford},
title = {Tracking Information Flow in Finance Text},
year = {2009},
abbr = {Honours thesis},
type = {Honours thesis},
institution = {University of Sydney},
address = {Sydney, Australia},
myurl = {radford09honsthesis},
url = {http://sydney.edu.au/engineering/it/~wradford/pubs/wradford-hons-thesis.pdf},
keywords = {Information Flow, Finance Text, News Text}
}
@InProceedings{radford:hsm10,
author = {Will Radford and Ben Hachey and James R. Curran and Maria Milosavljevic},
title = {Tracking Information Flow between Primary and Secondary News Sources},
booktitle = {Proceedings of the NAACL HLT 2010 Workshop on Computational Linguistics in a World of Social Media},
month = {June},
year = {2010},
address = {Los Angeles, CA USA},
pages = {29--30},
myurl = {socmed10iflow},
abbr = {SocMed},
keywords = {Information Flow},
anthology = {W/W10/W10-0515},
url = {http://www.aclweb.org/anthology/W10-0515.pdf},
abstract = {Tracking information flow (IFLOW) is crucial to understanding the evolution of news stories. We present analysis and experiments for IFLOW between company announcements and newswire. Error analysis shows that many FPs are annotation errors and many FNs are due to coarse-grained document-level modelling. Experiments show that document meta-data features (e.g., category, length, timing) improve f-scores relative to upper bound by 23\%.}
}
@incollection{milosavljevic:ucs10,
author = {Maria Milosavljevic and Jean-Yves Delort and Ben Hachey and Bavani Arunasalam and Will Radford and James R. Curran},
affiliation = {Capital Markets CRC Limited 55 Harrington Street Sydney NSW 2000 Australia},
title = {Automating Financial Surveillance},
booktitle = {User Centric Media (LNCS)},
series = {Lecture Notes of the Institute for Computer Sciences, Social Informatics and Telecommunications Engineering},
editor = {Akan, Ozgur and Bellavista, Paolo and Cao, Jiannong and Dressler, Falko and Ferrari, Domenico and Gerla, Mario and Kobayashi, Hisashi and Palazzo, Sergio and Sahni, Sartaj and Shen, Xuemin (Sherman) and Stan, Mircea and Xiaohua, Jia and Zomaya, Albert and Coulson, Geoffrey and Daras, Petros and Ibarra, Oscar Mayora},
publisher = {Springer Berlin Heidelberg},
isbn = {978-3-642-12630-7},
keyword = {Computer Science},
pages = {305-311},
volume = {40},
url = {http://dx.doi.org/10.1007/978-3-642-12630-7_38},
note = {10.1007/978-3-642-12630-7_38},
year = {2010}
}
@InProceedings{radford:tac10,
author = {Will Radford and Ben Hachey and Joel Nothman and Matthew Honnibal and James R. Curran},
title = {Document-level Entity Linking: {CMCRC} at {TAC} 2010},
booktitle = {Proceedings of the Text Analysis Conference},
year = {2010},
month = {November},
address = {Gaithersburg, MD USA},
publisher = {National Institute of Standards and Technology},
keywords = {Information Extraction,Wikipedia},
url = {http://www.nist.gov/tac/publications/2010/participant.papers/CMCRC.proceedings.pdf},
myurl = {tac10sysdesc},
abbr = {TAC},
abstract = {This paper describes the CMCRC systems entered in the TAC 2010 entity linking challenge. The best performing system we describe implements the document-level entity linking system from Cucerzan (2007), with several additions that exploit global information. Our implementation of Cucerzan's method achieved a score of 74.9\% in development experiments. Additional global information improves performance to 78.4\%. On the TAC 2010 test data, our best system achieves a score of 84.4\%, which is second in the overall rankings of submitted systems.}
}
@InProceedings{hachey:wise11,
author = {Ben Hachey and Will Radford and James R. Curran},
title = {Graph-based Named Entity Linking with {W}ikipedia},
booktitle = {Proceedings of the 12th International Conference on Web Information System Engineering},
year = {2011},
address = {Sydney, NSW Australia},
publisher = {Springer},
keywords = {Information Extraction,Wikipedia},
abbr = {WISE},
myurl = {wise11graphlink},
url = {http://www.springer.com/computer/database+management+%26+information+retrieval/book/978-3-642-24433-9},
url_preprint = {http://web.science.mq.edu.au/~bhachey/pubs/hachey-wise11-graph-preprint.pdf},
abstract = {Named entity linking (NEL) grounds entity mentions to their corresponding Wikipedia article. State-of-the-art supervised NEL systems use features over the rich Wikipedia document and link-graph structure. Graph-based measures have been effective over WordNet for word sense disambiguation (WSD). We draw parallels between NEL and WSD, motivating our unsupervised NEL approach that exploits the Wikipedia article and category link graphs. Our system achieves 85.5\% accuracy on the TAC 2010 shared task - competitive with the best supervised and unsupervised systems.}
}
@InProceedings{radford:tac11,
author = {Will Radford and Ben Hachey and Matthew Honnibal and Joel Nothman and James R. Curran},
title = {Naive but effective {NIL} clustering baselines -- {CMCRC} at {TAC} 2011},
booktitle = {Proceedings of the Text Analysis Conference},
year = {2011},
month = {November},
address = {Gaithersburg, MD USA},
publisher = {National Institute of Standards and Technology},
keywords = {Information Extraction,Wikipedia},
myurl = {tac11sysdesc},
abbr = {TAC},
url = {http://sydney.edu.au/engineering/it/~wradford/pubs/tac11sysdesc.pdf},
abstract = {
This paper describes the CMCRC systems entered in the TAC2011 entity
linking challenge. We used our best-performing system from TAC2010 to
link queries, then clustered NIL links. We focused on naive
baselines that group by attributes of the top entity candidate. All
three systems performed strongly at 75.4\% B3 F1, above the 71.6\%
median score.
}
}
@InProceedings{radford:tac12,
author = {Will Radford and Will Cannings and Andrew Naoum and Joel Nothman and Glen Pink and Daniel Tse and James R. Curran},
title = {({A}lmost) {T}otal {R}ecall -- {SYDNEY_CMCRC} at {TAC} 2012},
booktitle = {Proceedings of the Text Analysis Conference},
year = {2012},
month = {November},
address = {Gaithersburg, MD USA},
publisher = {National Institute of Standards and Technology},
keywords = {Information Extraction,Wikipedia},
myurl = {tac11sysdesc},
abbr = {TAC},
web = {http://www.it.usyd.edu.au/~wradford/pubs/tac12sysdesc.pdf},
abstract = {
We explore unsupervised and supervised whole-document approaches to English
NEL with naive and context clustering. Our best system uses unsupervised
entity linking and naive clustering and scores 66.5\% B3+ F1
score. Our KB clustering score is competitive with the top systems
at 65.6\%.
}
}
@Article{hachey:aij10,
author = {Ben Hachey and Will Radford and Joel Nothman and Matthew Honnibal and James R. Curran},
title = {Evaluating Entity Linking with {Wikipedia}},
journal = {Artificial Intelligence},
abbr = {AI Journal},
publisher = {Elsevier},
year = {2013},
volume = {194},
pages = {130--150},
month = {January},
myurl = {aij10nelinking},
web = {http://dx.doi.org/10.1016/j.artint.2012.04.005},
keywords = {Information Extraction,Wikipedia},
abstract = {Named Entity Linking (NEL) grounds entity mentions to their
corresponding node in a Knowledge Base (KB). Recently, a number of systems
have been proposed for linking entity mentions in text to Wikipedia pages.
Such systems typically search for candidate entities and then disambiguate
them, returning either the best candidate or NIL. However, comparison has
focused on disambiguation accuracy, making it difficult to determine how
search impacts performance. Furthermore, important approaches from the
literature have not been systematically compared on standard data sets.
We reimplement three seminal NEL systems and present a detailed evaluation
of search strategies. Our experiments find that coreference and acronym
handling leads to substantial improvement, and search strategies account for
much of the variation between systems. This is an interesting finding,
because these aspects of the problem have often been neglected in the
literature, which has focused largely on complex candidate ranking
algorithms.}
}
@Article{nothman:aij10,
author = {Joel Nothman and Nicky Ringland and Will Radford and Tara Murphy and James R. Curran},
title = {Learning multilingual named entity recognition from {Wikipedia}},
journal = {Artificial Intelligence},
abbr = {AI Journal},
publisher = {Elsevier},
year = {2013},
volume = {194},
month = {January},
pages = {151--175},
web = {http://dx.doi.org/10.1016/j.artint.2012.03.006},
myurl = {aij10wikiner},
myresources = {WikiNER},
keywords = {Information Extraction,Wikipedia},
abstract = {We automatically create enormous, free and multilingual ``silver''-standard training annotations for named entity recognition (NER) by exploiting the text and structure of Wikipedia. Most NER systems rely on statistical models of annotated data to identify and classify names of people, locations and organisations in text. This dependence on expensive annotation is the knowledge bottleneck our work overcomes.
We first classify each Wikipedia article into named entity (NE) types, training and evaluating on 7,200 manually-labelled Wikipedia articles across nine languages. Our cross-lingual approach achieves up to 95\% accuracy.
We transform the links between articles into NE annotations by projecting the target article's classifications onto the anchor text. This approach yields reasonable annotations, but does not immediately compete with existing gold-standard data. By inferring additional links and heuristically tweaking the Wikipedia corpora, we better align our automatic annotations to gold standards.
We annotate millions of words in nine languages, evaluating English, German, Spanish, Dutch and Russian Wikipedia-trained models against CoNLL Shared Task data and other gold-standard corpora. Our approach outperforms other approaches to automatic NE annotation (Richman08,Mika08); competes with gold-standard training when tested on an evaluation corpus from a different source; and performs 10\% better than newswire-trained models on manually-annotated Wikipedia text.}
}
@InProceedings{radford-curran:2013:Short,
author = {Will Radford and James R. Curran},
title = {Joint Apposition Extraction with Syntactic and Semantic Constraints},
booktitle = {Proceedings of the 51st Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)},
month = {August},
year = {2013},
address = {Sofia, Bulgaria},
publisher = {Association for Computational Linguistics},
pages = {671--677},
keywords = {Information Extraction,Apposition},
url = {http://www.aclweb.org/anthology/P13-2118},
myurl = {acl13shortappos},
abstract = {
Appositions are adjacent NPs used to add
information to a discourse. We propose
systems exploiting syntactic and semantic constraints to extract appositions from
OntoNotes. Our joint log-linear model
outperforms the state-of-the-art Favre and
Hakkani-Tur (2009) model by ~10\% on
Broadcast News, and achieves 54.3\% Fscore on multiple genres.
}
}
@InProceedings{pink:tac13,
author = {Glen Pink and Will Radford and Will Cannings and Andrew Naoum and Joel Nothman and Daniel Tse and James R. Curran},
title = {{SYDNEY_CMCRC} at {TAC} 2013},
booktitle = {Proceedings of the Text Analysis Conference (to appear)},
year = {2013},
month = {November},
address = {Gaithersburg, MD USA},
publisher = {National Institute of Standards and Technology},
keywords = {Information Extraction,Wikipedia},
myurl = {tac13sysdesc},
abbr = {TAC},
web = {http://www.it.usyd.edu.au/~gpin7031/pubs/tac13sysdesc.pdf},
abstract = {
We use a supervised whole-document approach to English Entity Linking
with simple
clustering approaches. The system extends our TAC 2012 system,
introducing new features for modelling local
entity description and type-specific matching as well
type-specific supervised models and supervised NIL classification.
Our rule-based clustering takes advantage of local description and
topics to split NIL clusters.
The best system uses supervised entity linking and local description
type clustering and scores 70.5\% B3+ F1 score. Our KB
clustering score is competitive with the top system at 72.1\%.
}
}
@InProceedings{hachey-nothman-radford:2014:P14-2,
author = {Hachey, Ben and Nothman, Joel and Radford, Will},
title = {Cheap and easy entity evaluation},
booktitle = {Proceedings of the 52nd Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)},
month = {June},
year = {2014},
address = {Baltimore, Maryland},
publisher = {Association for Computational Linguistics},
pages = {464--469},
url = {http://www.aclweb.org/anthology/P14-2076}
}