-
Notifications
You must be signed in to change notification settings - Fork 9
/
nlp.bib
570 lines (385 loc) · 19 KB
/
nlp.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
%% Bibliography for NLP (natural language processing), as applied to
%% software engineering tasks.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Criticisms of Tellina's accuracy
%%%
@InProceedings{NeurIPS-2020-NLC2CMD-Competition,
title = {{NeurIPS} 2020 {NLC2CMD} Competition: Translating Natural Language to {Bash} Commands},
author = {Agarwal, Mayank and Chakraborti, Tathagata and Fu, Quchen and Gros, David and Lin, Xi Victoria and Maene, Jaron and Talamadupula, Kartik and Teng, Zhongwei and White, Jules},
booktitle = {Proceedings of the NeurIPS 2020 Competition and Demonstration Track},
pages = {302--324},
year = {2021},
volume = {133},
series = {Proceedings of Machine Learning Research},
month = {06--12 Dec},
pdf = {http://proceedings.mlr.press/v133/agarwal21b/agarwal21b.pdf},
url = {https://proceedings.mlr.press/v133/agarwal21b.html},
abstract = {The NLC2CMD Competition hosted at NeurIPS 2020 aimed to bring the power of natural language processing to the command line. Participants were tasked with building models that can transform descriptions of command line tasks in English to their Bash syntax. This is a report on the competition with details of the task, metrics, data, attempted solutions, and lessons learned.}
}
@InProceedings{FuTWS2021,
author = "Fu, Quchen and Teng, Zhongwei and White, Jules and Schmidt, Douglas C.",
title = "A Transformer-based Approach for Translating Natural Language to {Bash} Commands",
booktitle = "2021 20th IEEE International Conference on Machine Learning and Applications (ICMLA)",
year = 2021,
pages = "1245-1248",
}
@InProceedings{ChenHLO2020,
author = "Chen, Yan and Herskovitz, Jaylin and Lasecki, Walter S. and Oney, Steve",
title = "Bashon: A Hybrid Crowd-Machine Workflow for Shell Command Synthesis",
booktitle = "2020 IEEE Symposium on Visual Languages and Human-Centric Computing (VL/HCC)",
year = 2020,
pages = "1-8",
doi={10.1109/VL/HCC50065.2020.9127248}
}
@InProceedings{ZhangLXTZLZ2022,
author = "Neng Zhang and Chao Liu and Xin Xia and Christoph Treude and Ying Zou and David Lo and Zibin Zheng",
title = "{ShellFusion}: Answer Generation for Shell Programming Tasks via Knowledge Fusion",
crossref = "ICSE2022",
NEEDpages = "*",
}
@InProceedings{KanCW2020,
author = "Kan, Jia-Wei and Chien, Wei-Chin and Wang, Sheng-De",
title = "Grid Structure Attention for Natural Language Interface to {Bash} Commands",
booktitle = "2020 International Computer Symposium (ICS)",
year = 2020,
pages = "67-72",
doi={10.1109/ICS51289.2020.00023},
}
@InProceedings{KumarNSAS2019,
author = "Kumar, NS and Nagalakshmi, Malathy and Sharma, Tanya and Ambati, Sai Bhavana and Satyanarayana, Vibha",
title = "Natural Language Interface to {Linux} Shell – Report",
booktitle = "2019 3rd International Conference on Computing and Communications Technologies (ICCCT)",
year = 2019,
pages = "24-30",
doi={10.1109/ICCCT2.2019.8824800},
}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Participants in NLC2CMD competition
%%%
@inproceedings{VaswaniSPUJGKP2017,
author = {Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, \L ukasz and Polosukhin, Illia},
booktitle = {Advances in Neural Information Processing Systems},
NEEDpages = {},
title = {Attention is All you Need},
url = {https://proceedings.neurips.cc/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf},
volume = {30},
year = {2017}
}
@InProceedings{TangMRS2018,
author = "Gongbo Tang and Mathias M{\"{u}}ller and Annette Rios and Rico Sennrich",
title = "Why Self-Attention? {A} Targeted Evaluation of Neural Machine Translation Architectures",
booktitle = "2018 Conference on Empirical Methods in Natural Language
Processing",
year = 2018,
address = "Brussels, Belgium",
}
@Misc{Gros2019,
author = "David Gros",
title = "{AInix}: An Open Platform for Natural Language Interfaces to Shell Commands",
month = may,
year = 2019,
note = "Undergraduate Honors Thesis, Computer Science Department, University of Texas at Austin",
url="http://www.cs.utexas.edu/users/ai-labpub-view.php?PubID=127814",
}
@TechReport{RadfordWCLAS2019,
author = "Alec Radford and Jeffrey Wu and Rewon Child and David Luan and Dario Amodei and Ilya Sutskever",
title = "Language models are unsupervised multitask learners",
institution = "OpenAI",
year = 2019,
url = "http://www.persagen.com/files/misc/radford2019language.pdf",
}
@TechReport{LinvinovMPKO2020,
author = "Denis Litvinov and Gleb Morgachev and Artem Popov and Nikolai Korolev and Dmitrii Orekhov",
title = "{NLC2CMD} Report from {JB} Team",
institution = "JetBrains",
year = 2020,
month = dec,
url = "https://github.com/JetBrains/nlc2cmd/blob/master/report.pdf",
}
@Misc{KangY2020,
author = "Sungmin Kang and Juyeon Yoon",
title = "Hierarchical Decoding of {Bash} Commands",
year = 2020,
note = "Talk at NeurIPS 2020",
url = "https://slideslive.com/38942503/hierarchical-decoder-for-bash-commands",
}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% NLP to command line (bash) tools
%%%
@inproceedings{NEURIPS2020_1457c0d6,
author = {Brown, Tom and Mann, Benjamin and Ryder, Nick and Subbiah, Melanie and Kaplan, Jared D and Dhariwal, Prafulla and Neelakantan, Arvind and Shyam, Pranav and Sastry, Girish and Askell, Amanda and Agarwal, Sandhini and Herbert-Voss, Ariel and Krueger, Gretchen and Henighan, Tom and Child, Rewon and Ramesh, Aditya and Ziegler, Daniel and Wu, Jeffrey and Winter, Clemens and Hesse, Chris and Chen, Mark and Sigler, Eric and Litwin, Mateusz and Gray, Scott and Chess, Benjamin and Clark, Jack and Berner, Christopher and McCandlish, Sam and Radford, Alec and Sutskever, Ilya and Amodei, Dario},
booktitle = {Advances in Neural Information Processing Systems (NeurIPS 2020)},
pages = {1877--1901},
title = {Language Models are Few-Shot Learners},
url = {https://proceedings.neurips.cc/paper/2020/file/1457c0d6bfcb4967418bfb8ac142f64a-Paper.pdf},
volume = {33},
year = {2020}
}
@Article{LiWYT2019,
author = "Hao Li and Yu-Ping Wang and Jie Yin and Gang Tan",
title = "{SmartShell}: Automated Shell Scripts Synthesis from Natural Language",
journal = "International Journal of Software Engineering and Knowledge Engineering",
year = 2019,
volume = 29,
number = 02,
pages = "197-220",
doi = "https://doi.org/10.1142/S0218194019500098",
}
@InProceedings{CLAI-NeurIPS2019-demonstration,
author = "Mayank Agarwal and Jorge Barroso Carmona and Tathagata Chakraborti and Eli M. Dow and Kshitij P. Fadnis and Borja Godoy and Kartik Talamadupula",
title = "Project {CLAI} --- Bringing {AI} to the Command Line Interface",
booktitle = "NeurIPS 2019 Demonstration Track",
year = 2019,
}
@article{Agarwal2020ProjectCI,
title={Project CLAI: Instrumenting the Command Line as a New Environment for AI Agents},
author={Mayank Agarwal and Jorge J. Barroso and Tathagata Chakraborti and Eli M. Dow and Kshitij P. Fadnis and Borja Godoy and Madhavan Pallan and Kartik Talamadupula},
journal={arXiv: Human-Computer Interaction},
year={2020}
}
@Misc{CLAI-arxiv-2002.00762,
author = {Mayank Agarwal and
Jorge J. Barroso and
Tathagata Chakraborti and
Eli M. Dow and
Kshitij P. Fadnis and
Borja Godoy and
Kartik Talamadupula},
title = {{CLAI:} {A} Platform for {AI} Skills on the Command Line},
howpublished = {https://arxiv.org/abs/2002.00762},
url = {https://arxiv.org/abs/2002.00762},
month = jun,
year = 2020,
note = "v2",
}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% NLP to test assertion tools
%%%
@InProceedings{DinellaRML2022,
author = "Dinella, Elizabeth and Ryan, Gabriel and Mytkowicz, Todd and Lahiri, Shuvendu K.",
title = "{TOGA}: a neural method for test oracle generation",
crossref = "ICSE2022",
pages = "2130-2141",
}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Models of code
%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% LLMs
%%%
@inproceedings{10.1145/3491101.3519665,
author = {Vaithilingam, Priyan and Zhang, Tianyi and Glassman, Elena L.},
title = {Expectation vs.\ Experience: Evaluating the Usability of Code Generation Tools Powered by Large Language Models},
year = {2022},
isbn = {9781450391566},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3491101.3519665},
doi = {10.1145/3491101.3519665},
abstract = {Recent advances in Large Language Models (LLM) have made automatic code generation possible for real-world programming tasks in general-purpose programming languages such as Python. However, there are few human studies on the usability of these tools and how they fit the programming workflow. In this work, we conducted a within-subjects user study with 24 participants to understand how programmers use and perceive Copilot, a LLM-based code generation tool. We found that, while Copilot did not necessarily improve the task completion time or success rate, most participants preferred to use Copilot in daily programming tasks, since Copilot often provided a useful starting point and saved the effort of searching online. However, participants did face difficulties in understanding, editing, and debugging code snippets generated by Copilot, which significantly hindered their task-solving effectiveness. Finally, we highlighted several promising directions for improving the design of Copilot based on our observations and participants’ feedback.},
booktitle = {Extended Abstracts of the 2022 CHI Conference on Human Factors in Computing Systems},
articleno = {332},
numpages = {7},
keywords = {github copilot, large language model},
location = {New Orleans, LA, USA},
series = {CHI EA '22}
}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Other
%%%
@InProceedings{TanYKZ2007,
author = "Tan, Lin and Yuan, Ding and Krishna, Gopal and Zhou, Yuanyuan",
title = "/*{iComment}: Bugs or Bad Comments?*/",
crossref = "SOSP2007",
pages = "145--158",
}
@InProceedings{TanZP2011,
author = "Tan, Lin and Zhou, Yuanyuan and Padioleau, Yoann",
title = "{aComment}: Mining annotations from comments and code to detect interrupt related concurrency bugs",
crossref = "ICSE2011",
pages = "11--20",
}
@InProceedings{YeSMBL2016,
author = "Xin Ye and Hui Shen and Xiao Ma and Razvan Bunescu and Chang Liu",
title = "From word embeddings to document similarities for improved information retrieval in software engineering",
crossref = "ICSE2016",
NEEDpages = "*",
}
@InProceedings{AllamanisBBS2014,
author = "Allamanis, Miltiadis and Barr, Earl T. and Bird, Christian and Sutton, Charles",
title = "Learning natural coding conventions",
crossref = "FSE2014",
pages = "281--293",
}
@InProceedings{PanditaXZXOP2012,
author = "Pandita, Rahul and Xiao, Xusheng and Zhong, Hao and Xie, Tao and Oney, Stephen and Paradkar, Amit",
title = "Inferring method specifications from natural language {API} descriptions",
crossref = "ICSE2012",
pages = "815--825",
}
@InProceedings{HindleBSGD2012,
author = "Hindle, Abram and Barr, Earl T. and Su, Zhendong and Gabel, Mark and Devanbu, Premkumar",
title = "On the Naturalness of Software",
crossref = "ICSE2012",
pages = "837--847",
}
@InProceedings{HowardGPVS2013,
author = "Howard, Matthew J. and Gupta, Samir and Pollock, Lori and Vijay-Shanker, K.",
title = "Automatically mining software-based, semantically-similar words from comment-code mappings",
crossref = "MSR2013",
pages = "377--386",
}
@InProceedings{GuptaMPVS2013,
author = "Samir Gupta and Sana Malik and Lori Pollock and K. Vijay-Shanker",
title = "Part-of-speech tagging of program identifiers for improved text-based software engineering tools",
crossref = "ICPC2013",
pages = "3--12",
}
@InProceedings{SridharaHMPVS2010,
author = "Sridhara, Giriprasad and Hill, Emily and Muppaneni, Divya and Pollock, Lori and Vijay-Shanker, K.",
title = "Towards automatically generating summary comments for {Java} methods",
crossref = "ASE2010",
pages = "43--52",
}
@InProceedings{HillFBSNPV2008,
author = "Hill, Emily and Fry, Zachary P. and Boyd, Haley and Sridhara, Giriprasad and Novikova, Yana and Pollock, Lori and Vijay-Shanker, K.",
title = "{AMAP}: Automatically mining abbreviation expansions in programs to enhance software maintenance tools",
crossref = "MSR2008",
pages = "79--88",
}
@InProceedings{ArnaoudovaEOGA2010,
author = "Arnaoudova, Venera and Eshkevari, Laleh and Oliveto, Rocco and Gueheneuc, Yann-Gael and Antoniol, Giuliano",
title = "Physical and conceptual identifier dispersion: Measures and relation to fault proneness",
crossref = "ICSM2010",
pages = "1--5",
}
@Article{LawrieMFB2007,
author = "Lawrie, Dawn and Morrell, Christopher and Feild, Henry and Binkley, David",
title = "Effective identifier names for comprehension and memory",
journal = "Innovations in Systems and Software Engineering",
year = 2007,
volume = 3,
number = 4,
pages = "303--318",
month = dec,
abstract =
"Readers of programs have two main sources of domain information:
identifier names and comments. When functions are uncommented, as many are,
comprehension is almost exclusively dependent on the identifier
names. Assuming that writers of programs want to create quality identifiers
(e.g., identifiers that include relevant domain knowledge), one must ask
how should they go about it. For example, do the initials of a concept name
provide enough information to represent the concept? If not, and a longer
identifier is needed, is an abbreviation satisfactory or does the concept
need to be captured in an identifier that includes full words? What is the
effect of longer identifiers on limited short term memory capacity? Results
from a study designed to investigate these questions are reported. The
study involved over 100 programmers who were asked to describe 12 different
functions and then recall identifiers that appeared in each function. The
functions used three different levels of identifiers: single letters,
abbreviations, and full words. Responses allow the extent of comprehension
associated with the different levels to be studied along with their impact
on memory. The functions used in the study include standard computer
science textbook algorithms and functions extracted from production
code. The results show that full-word identifiers lead to the best
comprehension; however, in many cases, there is no statistical difference
between using full words and abbreviations. When considered in the light of
limited human short-term memory, well-chosen abbreviations may be
preferable in some situations since identifiers with fewer syllables are
easier to remember.",
}
@Article{DeissenboeckP2006,
author = "Deissenboeck, Florian and Pizka, Markus",
title = "Concise and consistent naming",
journal = "Software Quality Journal",
year = 2006,
volume = 14,
number = 3,
pages = "261--282",
month = sep,
}
@InProceedings{MihalceaCS2006,
author = "Mihalcea, Rada and Corley, Courtney and Strapparava, Carlo",
title = "Corpus-based and knowledge-based measures of text semantic similarity",
crossref = "AAAI2006",
pages = "775--780",
}
@InProceedings{LawrieMB2010,
author = "Dawn Lawrie and Christopher Morrell and Dave Binkley",
title = "Normalizing source code vocabulary",
crossref = "WCRE2010",
pages = "3-12",
}
@InProceedings{MotwaniBrun2019,
author = "Motwani, Manish and Brun, Yuriy",
title = "Automatically Generating Precise Oracles from Structured Natural Language Specifications",
crossref = "ICSE2019",
pages = "188--199",
}
@InProceedings{HuLXLJ2018,
author = "Hu, Xing and Li, Ge and Xia, Xin and Lo, David and Jin, Zhi",
title = "Deep code comment generation",
crossref = "ICPC2018",
pages = "200--210",
}
@Misc{LouisDBS2018,
author = "Annie Louis and Santanu Kumar Dash and Earl T. Barr and Charles Sutton",
title = "Deep Learning to Detect Redundant Method Comments",
howpublished = "\url{http://arxiv.org/abs/1806.04616}",
month = jun,
year = 2018,
}
@InProceedings{MikolovSCCD2013,
author = "Mikolov, Tomas and Sutskever, Ilya and Chen, Kai and Corrado, Greg and Dean, Jeffrey",
title = "Distributed representations of words and phrases and their compositionality",
crossref = "NIPS2013",
pages = "3111--3119",
}
@InProceedings{MovshovitzAttiasC2013,
author = "Movshovitz-Attias, Dana and Cohen, William W.",
title = "Natural language models for predicting programming comments",
crossref = "ACL2013short",
pages = "35--40",
}
@InProceedings{BuzeW2010,
author = "Buse, Raymond P.L. and Weimer, Westley R.",
title = "Automatically documenting program changes",
crossref = "ASE2010",
pages = "33-42",
}
@InProceedings{PascarellaB2017,
author = "Pascarella, Luca and Bacchelli, Alberto",
title = "Classifying code comments in {Java} open-source software systems",
crossref = "MSR2017",
pages = "227-237",
supersededby = "PascarellaBB2019"
}
@Article{PascarellaBB2019,
author = "Pascarella, Luca and Bruntink, Magiel and Bacchelli, Alberto",
title = "Classifying code comments in {Java} software systems",
journal = JEmpiricalSE,
year = 2019,
volume = 24,
number = 3,
pages = "1499-1537",
month = jun,
}
@InProceedings{DevlinCLT2019,
author = "Jacob Devlin and Ming-Wei Chang and Kenton Lee and Kristina Toutanova",
title = "{BERT}: Pre-training of deep bidirectional transformers for language understanding",
crossref = "NAACL-HLT2019",
pages = "4171--4186",
}
% LocalWords: InProceedings TanYKZ2007 Gopal Zhou Yuanyuan iComment Iyer pdf
% LocalWords: booktitle SOSP2007 SOSP2007date SOSP2007addr Benwen NN pre url
% LocalWords: testEntrySetClearChangesMap Srinivasan Ioannis Konstas Xin
% LocalWords: testSettingHeightThatIsTooSmallLeavesHeightUnchanged LSTM
% LocalWords: Zettlemoyer YeSMBL2016 Shen Xiao Razvan Bunescu Liu MRR Za
% LocalWords: ICSE2016 NEEDpages ICSE2016date ICSE2016addr Wiki LSA CCG
% LocalWords: stemmer Kushman Barzilay Turkers regex regexes Mise Kiddon
% LocalWords: Ganesa Thandavam Ponnuraj Yejin Choi Branavan Miltiadis xj
% LocalWords: Allamanis AAAI Briand Briand's Hirschberg uncompelling xk
% LocalWords: Movshovitz Attias ICPC preprocess pickaxe xl Convolutional
% LocalWords: Hao Peng ie camelcase tokenizer Naturalize's NLC2CMD
% LocalWords: Agarwal Mayank Chakraborti Tathagata Fu Quchen Gros
% LocalWords: Maene Jaron Talamadupula Kartik Teng Zhongwei