-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathreferences.bib
1080 lines (1015 loc) · 43.4 KB
/
references.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
@article{Fukushima1980,
doi = {10.1007/bf00344251},
url = {https://doi.org/10.1007/bf00344251},
year = {1980},
month = apr,
publisher = {Springer Science and Business Media {LLC}},
volume = {36},
number = {4},
pages = {193--202},
author = {Kunihiko Fukushima},
title = {Neocognitron: A self-organizing neural network model for a mechanism of pattern recognition unaffected by shift in position},
journal = {Biological Cybernetics}
}
@ARTICLE{726791,
author={Lecun, Y. and Bottou, L. and Bengio, Y. and Haffner, P.},
journal={Proceedings of the IEEE},
title={Gradient-based learning applied to document recognition},
year={1998},
volume={86},
number={11},
pages={2278-2324},
doi={10.1109/5.726791}
}
@ARTICLE{6795724,
author={LeCun, Y. and Boser, B. and Denker, J. S. and Henderson, D. and Howard, R. E. and Hubbard, W. and Jackel, L. D.},
journal={Neural Computation},
title={Backpropagation Applied to Handwritten Zip Code Recognition},
year={1989},
volume={1},
number={4},
pages={541-551},
doi={10.1162/neco.1989.1.4.541}
}
@book{Goodfellow-et-al-2016,
title={Deep Learning},
author={Ian Goodfellow and Yoshua Bengio and Aaron Courville},
publisher={MIT Press},
note={\url{http://www.deeplearningbook.org}},
year={2016}
}
@book{Arndt2011,
doi = {10.1007/978-3-642-14764-7},
url = {https://doi.org/10.1007/978-3-642-14764-7},
year = {2011},
publisher = {Springer Berlin Heidelberg},
author = {J\"{o}rg Arndt},
title = {Matters Computational}
}
@article{Zhou1988ComputationOO,
title={Computation of optical flow using a neural network},
author={Yi-Tong Zhou and Rama Chellappa},
journal={IEEE 1988 International Conference on Neural Networks},
year={1988},
pages={71-78 vol.2},
url={https://api.semanticscholar.org/CorpusID:7292956}
}
@misc{lin2014network,
title={Network In Network},
author={Min Lin and Qiang Chen and Shuicheng Yan},
year={2014},
eprint={1312.4400},
archivePrefix={arXiv},
primaryClass={cs.NE}
}
@article{Zou2023,
doi = {10.1109/jproc.2023.3238524},
url = {https://doi.org/10.1109/jproc.2023.3238524},
year = {2023},
month = mar,
publisher = {Institute of Electrical and Electronics Engineers ({IEEE})},
volume = {111},
number = {3},
pages = {257--276},
author = {Zhengxia Zou and Keyan Chen and Zhenwei Shi and Yuhong Guo and Jieping Ye},
title = {Object Detection in 20 Years: A Survey},
journal = {Proceedings of the {IEEE}}
}
@article{Zaidi2022,
doi = {10.1016/j.dsp.2022.103514},
url = {https://doi.org/10.1016/j.dsp.2022.103514},
year = {2022},
month = jun,
publisher = {Elsevier {BV}},
volume = {126},
pages = {103514},
author = {Syed Sahil Abbas Zaidi and Mohammad Samar Ansari and Asra Aslam and Nadia Kanwal and Mamoona Asghar and Brian Lee},
title = {A survey of modern deep learning based object detection models},
journal = {Digital Signal Processing}
}
@article{Viola2004,
doi = {10.1023/b:visi.0000013087.49260.fb},
url = {https://doi.org/10.1023/b:visi.0000013087.49260.fb},
year = {2004},
month = may,
publisher = {Springer Science and Business Media {LLC}},
volume = {57},
number = {2},
pages = {137--154},
author = {Paul Viola and Michael J. Jones},
title = {Robust Real-Time Face Detection},
journal = {International Journal of Computer Vision}
}
@INPROCEEDINGS{990517,
author={Viola, P. and Jones, M.},
booktitle={Proceedings of the 2001 IEEE Computer Society Conference on Computer Vision and Pattern Recognition. CVPR 2001},
title={Rapid object detection using a boosted cascade of simple features},
year={2001},
volume={1},
number={},
pages={I-I},
doi={10.1109/CVPR.2001.990517}
}
@INPROCEEDINGS{1467360,
author={Dalal, N. and Triggs, B.},
booktitle={2005 IEEE Computer Society Conference on Computer Vision and Pattern Recognition (CVPR'05)},
title={Histograms of oriented gradients for human detection},
year={2005},
volume={1},
number={},
pages={886-893 vol. 1},
doi={10.1109/CVPR.2005.177}
}
@article{GU2022104401,
title = {A review on 2D instance segmentation based on deep neural networks},
journal = {Image and Vision Computing},
volume = {120},
pages = {104401},
year = {2022},
issn = {0262-8856},
doi = {https://doi.org/10.1016/j.imavis.2022.104401},
url = {https://www.sciencedirect.com/science/article/pii/S0262885622000300},
author = {Wenchao Gu and Shuang Bai and Lingxing Kong},
keywords = {Instance segmentation, Deep neural networks, Computer vision, Review},
}
@ARTICLE{9356353,
author={Minaee, Shervin and Boykov, Yuri and Porikli, Fatih and Plaza, Antonio and Kehtarnavaz, Nasser and Terzopoulos, Demetri},
journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
title={Image Segmentation Using Deep Learning: A Survey},
year={2022},
volume={44},
number={7},
pages={3523-3542},
doi={10.1109/TPAMI.2021.3059968}
}
@InProceedings{10.1007/978-3-319-10584-0_20,
author="Hariharan, Bharath
and Arbel{\'a}ez, Pablo
and Girshick, Ross
and Malik, Jitendra",
editor="Fleet, David
and Pajdla, Tomas
and Schiele, Bernt
and Tuytelaars, Tinne",
title="Simultaneous Detection and Segmentation",
booktitle="Computer Vision -- ECCV 2014",
year="2014",
publisher="Springer International Publishing",
address="Cham",
pages="297--312",
}
@misc{wang2023internimage,
title={InternImage: Exploring Large-Scale Vision Foundation Models with Deformable Convolutions},
author={Wenhai Wang and Jifeng Dai and Zhe Chen and Zhenhang Huang and Zhiqi Li and Xizhou Zhu and Xiaowei Hu and Tong Lu and Lewei Lu and Hongsheng Li and Xiaogang Wang and Yu Qiao},
year={2023},
eprint={2211.05778},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
@misc{liu2022convnet,
title={A ConvNet for the 2020s},
author={Zhuang Liu and Hanzi Mao and Chao-Yuan Wu and Christoph Feichtenhofer and Trevor Darrell and Saining Xie},
year={2022},
eprint={2201.03545},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
@misc{tan2021efficientnetv2,
title={EfficientNetV2: Smaller Models and Faster Training},
author={Mingxing Tan and Quoc V. Le},
year={2021},
eprint={2104.00298},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
@misc{tan2020efficientdet,
title={EfficientDet: Scalable and Efficient Object Detection},
author={Mingxing Tan and Ruoming Pang and Quoc V. Le},
year={2020},
eprint={1911.09070},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
@article{Jaccard1912,
doi = {10.1111/j.1469-8137.1912.tb05611.x},
url = {https://doi.org/10.1111/j.1469-8137.1912.tb05611.x},
year = {1912},
month = feb,
publisher = {Wiley},
volume = {11},
number = {2},
pages = {37--50},
author = {Paul Jaccard},
title = {{THE} {DISTRIBUTION} {OF} {THE} {FLORA} {IN} {THE} {ALPINE} {ZONE}.1},
journal = {New Phytologist}
}
@article{10.5169/SEALS-266450,
doi = {10.5169/SEALS-266450},
url = {https://www.e-periodica.ch/digbib/view?pid=bsv-002:1901:37::790},
author = {{Jaccard, Paul}},
title = {Étude comparative de la distribution florale dans une portion des Alpes et du Jura},
publisher = {Imprimerie Corbaz & Comp.},
year = {1901}
}
@misc{lin2015microsoft,
title={Microsoft COCO: Common Objects in Context},
author={Tsung-Yi Lin and Michael Maire and Serge Belongie and Lubomir Bourdev and Ross Girshick and James Hays and Pietro Perona and Deva Ramanan and C. Lawrence Zitnick and Piotr Dollár},
year={2015},
eprint={1405.0312},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
@article{DBLP:journals/corr/ZeilerF13,
author = {Matthew D. Zeiler and
Rob Fergus},
title = {Visualizing and Understanding Convolutional Networks},
journal = {CoRR},
volume = {abs/1311.2901},
year = {2013},
url = {http://arxiv.org/abs/1311.2901},
eprinttype = {arXiv},
eprint = {1311.2901},
timestamp = {Mon, 13 Aug 2018 16:48:37 +0200},
biburl = {https://dblp.org/rec/journals/corr/ZeilerF13.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{Bengio2009,
doi = {10.1561/2200000006},
url = {https://doi.org/10.1561/2200000006},
year = {2009},
publisher = {Now Publishers},
volume = {2},
number = {1},
pages = {1--127},
author = {Y. Bengio},
title = {Learning Deep Architectures for {AI}},
journal = {Foundations and Trends{\textregistered} in Machine Learning}
}
@InProceedings{Jung_2021_ICCV,
author = {Jung, Hyungsik and Oh, Youngrock},
title = {Towards Better Explanations of Class Activation Mapping},
booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)},
month = {October},
year = {2021},
pages = {1336-1344}
}
@article{DBLP:journals/corr/SelvarajuDVCPB16,
author = {Ramprasaath R. Selvaraju and
Abhishek Das and
Ramakrishna Vedantam and
Michael Cogswell and
Devi Parikh and
Dhruv Batra},
title = {Grad-CAM: Why did you say that? Visual Explanations from Deep Networks
via Gradient-based Localization},
journal = {CoRR},
volume = {abs/1610.02391},
year = {2016},
url = {http://arxiv.org/abs/1610.02391},
eprinttype = {arXiv},
eprint = {1610.02391},
timestamp = {Mon, 13 Aug 2018 16:46:58 +0200},
biburl = {https://dblp.org/rec/journals/corr/SelvarajuDVCPB16.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1710-11063,
author = {Aditya Chattopadhyay and
Anirban Sarkar and
Prantik Howlader and
Vineeth N. Balasubramanian},
title = {Grad-CAM++: Generalized Gradient-based Visual Explanations for Deep
Convolutional Networks},
journal = {CoRR},
volume = {abs/1710.11063},
year = {2017},
url = {http://arxiv.org/abs/1710.11063},
eprinttype = {arXiv},
eprint = {1710.11063},
timestamp = {Tue, 02 Aug 2022 09:11:19 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-1710-11063.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/ZhouKLOT15,
author = {Bolei Zhou and
Aditya Khosla and
{\`{A}}gata Lapedriza and
Aude Oliva and
Antonio Torralba},
title = {Learning Deep Features for Discriminative Localization},
journal = {CoRR},
volume = {abs/1512.04150},
year = {2015},
url = {http://arxiv.org/abs/1512.04150},
eprinttype = {arXiv},
eprint = {1512.04150},
timestamp = {Mon, 13 Aug 2018 16:47:46 +0200},
biburl = {https://dblp.org/rec/journals/corr/ZhouKLOT15.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2008-02312,
author = {Ruigang Fu and
Qingyong Hu and
Xiaohu Dong and
Yulan Guo and
Yinghui Gao and
Biao Li},
title = {Axiom-based Grad-CAM: Towards Accurate Visualization and Explanation
of CNNs},
journal = {CoRR},
volume = {abs/2008.02312},
year = {2020},
url = {https://arxiv.org/abs/2008.02312},
eprinttype = {arXiv},
eprint = {2008.02312},
timestamp = {Fri, 07 Aug 2020 15:07:21 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2008-02312.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1910-01279,
author = {Haofan Wang and
Mengnan Du and
Fan Yang and
Zijian Zhang},
title = {Score-CAM: Improved Visual Explanations Via Score-Weighted Class Activation
Mapping},
journal = {CoRR},
volume = {abs/1910.01279},
year = {2019},
url = {http://arxiv.org/abs/1910.01279},
eprinttype = {arXiv},
eprint = {1910.01279},
timestamp = {Thu, 04 Feb 2021 15:37:59 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-1910-01279.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@INPROCEEDINGS{9093360,
author={Desai, Saurabh and Ramaswamy, Harish G.},
booktitle={2020 IEEE Winter Conference on Applications of Computer Vision (WACV)},
title={Ablation-CAM: Visual Explanations for Deep Convolutional Network via Gradient-free Localization},
year={2020},
volume={},
number={},
pages={972-980},
doi={10.1109/WACV45572.2020.9093360}}
@article{DBLP:journals/corr/abs-1912-01451,
author = {Richard Tomsett and
Dan Harborne and
Supriyo Chakraborty and
Prudhvi Gurram and
Alun D. Preece},
title = {Sanity Checks for Saliency Metrics},
journal = {CoRR},
volume = {abs/1912.01451},
year = {2019},
url = {http://arxiv.org/abs/1912.01451},
eprinttype = {arXiv},
eprint = {1912.01451},
timestamp = {Thu, 02 Jan 2020 18:08:18 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-1912-01451.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/SamekBMBM15,
author = {Wojciech Samek and
Alexander Binder and
Gr{\'{e}}goire Montavon and
Sebastian Bach and
Klaus{-}Robert M{\"{u}}ller},
title = {Evaluating the visualization of what a Deep Neural Network has learned},
journal = {CoRR},
volume = {abs/1509.06321},
year = {2015},
url = {http://arxiv.org/abs/1509.06321},
eprinttype = {arXiv},
eprint = {1509.06321},
timestamp = {Mon, 13 Aug 2018 16:46:08 +0200},
biburl = {https://dblp.org/rec/journals/corr/SamekBMBM15.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2202-00449,
author = {Yao Rong and
Tobias Leemann and
Vadim Borisov and
Gjergji Kasneci and
Enkelejda Kasneci},
title = {Evaluating Feature Attribution: An Information-Theoretic Perspective},
journal = {CoRR},
volume = {abs/2202.00449},
year = {2022},
url = {https://arxiv.org/abs/2202.00449},
eprinttype = {arXiv},
eprint = {2202.00449},
timestamp = {Wed, 09 Feb 2022 15:43:35 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2202-00449.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{dumoulin2016guide,
title="{A guide to convolution arithmetic for deep learning}",
author = {{Dumoulin}, Vincent and {Visin}, Francesco},
journal = {ArXiv e-prints},
eprint = {1603.07285},
year={2016},
month={mar}
}
@article{DBLP:journals/corr/HeGDG17,
author = {Kaiming He and
Georgia Gkioxari and
Piotr Doll{\'{a}}r and
Ross B. Girshick},
title = {Mask {R-CNN}},
journal = {CoRR},
volume = {abs/1703.06870},
year = {2017},
url = {http://arxiv.org/abs/1703.06870},
eprinttype = {arXiv},
eprint = {1703.06870},
timestamp = {Mon, 13 Aug 2018 16:46:36 +0200},
biburl = {https://dblp.org/rec/journals/corr/HeGDG17.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@Article{machines11070677,
AUTHOR = {Hussain, Muhammad},
TITLE = {YOLO-v1 to YOLO-v8, the Rise of YOLO and Its Complementary Nature toward Digital Manufacturing and Industrial Defect Detection},
JOURNAL = {Machines},
VOLUME = {11},
YEAR = {2023},
NUMBER = {7},
ARTICLE-NUMBER = {677},
URL = {https://www.mdpi.com/2075-1702/11/7/677},
ISSN = {2075-1702},
ABSTRACT = {Since its inception in 2015, the YOLO (You Only Look Once) variant of object detectors has rapidly grown, with the latest release of YOLO-v8 in January 2023. YOLO variants are underpinned by the principle of real-time and high-classification performance, based on limited but efficient computational parameters. This principle has been found within the DNA of all YOLO variants with increasing intensity, as the variants evolve addressing the requirements of automated quality inspection within the industrial surface defect detection domain, such as the need for fast detection, high accuracy, and deployment onto constrained edge devices. This paper is the first to provide an in-depth review of the YOLO evolution from the original YOLO to the recent release (YOLO-v8) from the perspective of industrial manufacturing. The review explores the key architectural advancements proposed at each iteration, followed by examples of industrial deployment for surface defect detection endorsing its compatibility with industrial requirements.},
DOI = {10.3390/machines11070677}
}
@article{DBLP:journals/corr/YosinskiCBL14,
author = {Jason Yosinski and
Jeff Clune and
Yoshua Bengio and
Hod Lipson},
title = {How transferable are features in deep neural networks?},
journal = {CoRR},
volume = {abs/1411.1792},
year = {2014},
url = {http://arxiv.org/abs/1411.1792},
eprinttype = {arXiv},
eprint = {1411.1792},
timestamp = {Mon, 13 Aug 2018 16:47:20 +0200},
biburl = {https://dblp.org/rec/journals/corr/YosinskiCBL14.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@misc{liu2021swin,
title={Swin Transformer: Hierarchical Vision Transformer using Shifted Windows},
author={Ze Liu and Yutong Lin and Yue Cao and Han Hu and Yixuan Wei and Zheng Zhang and Stephen Lin and Baining Guo},
year={2021},
eprint={2103.14030},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
@article{Wang_2022,
doi = {10.1007/s41095-022-0274-8},
url = {https://doi.org/10.1007%2Fs41095-022-0274-8},
year = 2022,
month = {mar},
publisher = {Springer Science and Business Media {LLC}},
volume = {8},
number = {3},
pages = {415--424},
author = {Wenhai Wang and Enze Xie and Xiang Li and Deng-Ping Fan and Kaitao Song and Ding Liang and Tong Lu and Ping Luo and Ling Shao},
title = {{PVT} v2: Improved baselines with Pyramid Vision Transformer},
journal = {Computational Visual Media}
}
@misc{zhai2022scaling,
title={Scaling Vision Transformers},
author={Xiaohua Zhai and Alexander Kolesnikov and Neil Houlsby and Lucas Beyer},
year={2022},
eprint={2106.04560},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
@misc{dosovitskiy2021image,
title={An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale},
author={Alexey Dosovitskiy and Lucas Beyer and Alexander Kolesnikov and Dirk Weissenborn and Xiaohua Zhai and Thomas Unterthiner and Mostafa Dehghani and Matthias Minderer and Georg Heigold and Sylvain Gelly and Jakob Uszkoreit and Neil Houlsby},
year={2021},
eprint={2010.11929},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
@misc{vaswani2023attention,
title={Attention Is All You Need},
author={Ashish Vaswani and Noam Shazeer and Niki Parmar and Jakob Uszkoreit and Llion Jones and Aidan N. Gomez and Lukasz Kaiser and Illia Polosukhin},
year={2023},
eprint={1706.03762},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{zhu2018deformable,
title={Deformable ConvNets v2: More Deformable, Better Results},
author={Xizhou Zhu and Han Hu and Stephen Lin and Jifeng Dai},
year={2018},
eprint={1811.11168},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
@misc{ba2016layer,
title={Layer Normalization},
author={Jimmy Lei Ba and Jamie Ryan Kiros and Geoffrey E. Hinton},
year={2016},
eprint={1607.06450},
archivePrefix={arXiv},
primaryClass={stat.ML}
}
@misc{hendrycks2023gaussian,
title={Gaussian Error Linear Units (GELUs)},
author={Dan Hendrycks and Kevin Gimpel},
year={2023},
eprint={1606.08415},
archivePrefix={arXiv},
primaryClass={cs.LG}
}
@INPROCEEDINGS{5206848,
author={Deng, Jia and Dong, Wei and Socher, Richard and Li, Li-Jia and Kai Li and Li Fei-Fei},
booktitle={2009 IEEE Conference on Computer Vision and Pattern Recognition},
title={ImageNet: A large-scale hierarchical image database},
year={2009},
volume={},
number={},
pages={248-255},
doi={10.1109/CVPR.2009.5206848}}
@misc{touvron2021training,
title={Training data-efficient image transformers \& distillation through attention},
author={Hugo Touvron and Matthieu Cord and Matthijs Douze and Francisco Massa and Alexandre Sablayrolles and Hervé Jégou},
year={2021},
eprint={2012.12877},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
@INPROCEEDINGS{8100117,
author={Xie, Saining and Girshick, Ross and Dollár, Piotr and Tu, Zhuowen and He, Kaiming},
booktitle={2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
title={Aggregated Residual Transformations for Deep Neural Networks},
year={2017},
volume={},
number={},
pages={5987-5995},
doi={10.1109/CVPR.2017.634}}
@article{DBLP:journals/corr/abs-2005-14165,
author = {Tom B. Brown and
Benjamin Mann and
Nick Ryder and
Melanie Subbiah and
Jared Kaplan and
Prafulla Dhariwal and
Arvind Neelakantan and
Pranav Shyam and
Girish Sastry and
Amanda Askell and
Sandhini Agarwal and
Ariel Herbert{-}Voss and
Gretchen Krueger and
Tom Henighan and
Rewon Child and
Aditya Ramesh and
Daniel M. Ziegler and
Jeffrey Wu and
Clemens Winter and
Christopher Hesse and
Mark Chen and
Eric Sigler and
Mateusz Litwin and
Scott Gray and
Benjamin Chess and
Jack Clark and
Christopher Berner and
Sam McCandlish and
Alec Radford and
Ilya Sutskever and
Dario Amodei},
title = {Language Models are Few-Shot Learners},
journal = {CoRR},
volume = {abs/2005.14165},
year = {2020},
url = {https://arxiv.org/abs/2005.14165},
eprinttype = {arXiv},
eprint = {2005.14165},
timestamp = {Thu, 25 May 2023 10:38:31 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2005-14165.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@InProceedings{pmlr-v97-tan19a,
title = {{E}fficient{N}et: Rethinking Model Scaling for Convolutional Neural Networks},
author = {Tan, Mingxing and Le, Quoc},
booktitle = {Proceedings of the 36th International Conference on Machine Learning},
pages = {6105--6114},
year = {2019},
editor = {Chaudhuri, Kamalika and Salakhutdinov, Ruslan},
volume = {97},
series = {Proceedings of Machine Learning Research},
month = {09--15 Jun},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v97/tan19a/tan19a.pdf},
url = {https://proceedings.mlr.press/v97/tan19a.html},
abstract = {Convolutional Neural Networks (ConvNets) are commonly developed at a fixed resource budget, and then scaled up for better accuracy if more resources are given. In this paper, we systematically study model scaling and identify that carefully balancing network depth, width, and resolution can lead to better performance. Based on this observation, we propose a new scaling method that uniformly scales all dimensions of depth/width/resolution using a simple yet highly effective compound coefficient. We demonstrate the effectiveness of this method on MobileNets and ResNet. To go even further, we use neural architecture search to design a new baseline network and scale it up to obtain a family of models, called EfficientNets, which achieve much better accuracy and efficiency than previous ConvNets. In particular, our EfficientNet-B7 achieves stateof-the-art 84.4% top-1 / 97.1% top-5 accuracy on ImageNet, while being 8.4x smaller and 6.1x faster on inference than the best existing ConvNet (Huang et al., 2018). Our EfficientNets also transfer well and achieve state-of-the-art accuracy on CIFAR-100 (91.7%), Flower (98.8%), and 3 other transfer learning datasets, with an order of magnitude fewer parameters.}
}
@article{DBLP:journals/corr/abs-1801-04381,
author = {Mark Sandler and
Andrew G. Howard and
Menglong Zhu and
Andrey Zhmoginov and
Liang{-}Chieh Chen},
title = {Inverted Residuals and Linear Bottlenecks: Mobile Networks for Classification,
Detection and Segmentation},
journal = {CoRR},
volume = {abs/1801.04381},
year = {2018},
url = {http://arxiv.org/abs/1801.04381},
eprinttype = {arXiv},
eprint = {1801.04381},
timestamp = {Tue, 12 Jan 2021 15:30:06 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-1801-04381.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@misc{Gupta_Tan2019, title={EfficientNet-EDGETPU: Creating
accelerator-optimized neural networks with AutoML},
url={https://ai.googleblog.com/2019/08/efficientnet-edgetpu-creating.html},
journal={– Google Research Blog}, author={Gupta, Suyog and Tan,
Mingxing}}
@article{DBLP:journals/corr/abs-1911-09070,
author = {Mingxing Tan and
Ruoming Pang and
Quoc V. Le},
title = {EfficientDet: Scalable and Efficient Object Detection},
journal = {CoRR},
volume = {abs/1911.09070},
year = {2019},
url = {http://arxiv.org/abs/1911.09070},
eprinttype = {arXiv},
eprint = {1911.09070},
timestamp = {Tue, 03 Dec 2019 14:15:54 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-1911-09070.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@InProceedings{Lin_2017_CVPR,
author = {Lin, Tsung-Yi and Dollar, Piotr and Girshick, Ross and He, Kaiming and Hariharan, Bharath and Belongie, Serge},
title = {Feature Pyramid Networks for Object Detection},
booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
month = {July},
year = {2017}
}
@article{DBLP:journals/corr/abs-1803-01534,
author = {Shu Liu and
Lu Qi and
Haifang Qin and
Jianping Shi and
Jiaya Jia},
title = {Path Aggregation Network for Instance Segmentation},
journal = {CoRR},
volume = {abs/1803.01534},
year = {2018},
url = {http://arxiv.org/abs/1803.01534},
eprinttype = {arXiv},
eprint = {1803.01534},
timestamp = {Wed, 11 Sep 2019 15:40:23 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-1803-01534.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1904-07392,
author = {Golnaz Ghiasi and
Tsung{-}Yi Lin and
Ruoming Pang and
Quoc V. Le},
title = {{NAS-FPN:} Learning Scalable Feature Pyramid Architecture for Object
Detection},
journal = {CoRR},
volume = {abs/1904.07392},
year = {2019},
url = {http://arxiv.org/abs/1904.07392},
eprinttype = {arXiv},
eprint = {1904.07392},
timestamp = {Thu, 25 Apr 2019 13:55:01 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-1904-07392.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{adelson1984pmi,
added-at = {2011-09-19T12:12:54.000+0200},
author = {Adelson, E. H. and Anderson, C. H. and Bergen, J. R. and Burt, P. J. and Ogden, J. M.},
biburl = {https://www.bibsonomy.org/bibtex/259dfac6a273a879eb5c33f0f5b740980/sac},
citeulike-article-id = {1622723},
interhash = {1b86abb78a10e821d19471cbc87bbe0e},
intrahash = {59dfac6a273a879eb5c33f0f5b740980},
journal = {RCA Engineer},
keywords = {deepzoom image ma10 processing pyramid},
number = 6,
pages = {33--41},
posted-at = {2007-09-05 11:12:27},
priority = {0},
timestamp = {2011-09-19T12:12:54.000+0200},
title = {{1984, Pyramid methods in image processing}},
volume = 29,
year = 1984
}
@article{DBLP:journals/corr/ZophL16,
author = {Barret Zoph and
Quoc V. Le},
title = {Neural Architecture Search with Reinforcement Learning},
journal = {CoRR},
volume = {abs/1611.01578},
year = {2016},
url = {http://arxiv.org/abs/1611.01578},
eprinttype = {arXiv},
eprint = {1611.01578},
timestamp = {Mon, 13 Aug 2018 16:46:24 +0200},
biburl = {https://dblp.org/rec/journals/corr/ZophL16.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/ZophVSL17,
author = {Barret Zoph and
Vijay Vasudevan and
Jonathon Shlens and
Quoc V. Le},
title = {Learning Transferable Architectures for Scalable Image Recognition},
journal = {CoRR},
volume = {abs/1707.07012},
year = {2017},
url = {http://arxiv.org/abs/1707.07012},
eprinttype = {arXiv},
eprint = {1707.07012},
timestamp = {Mon, 13 Aug 2018 16:48:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/ZophVSL17.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/GirshickDDM13,
author = {Ross B. Girshick and
Jeff Donahue and
Trevor Darrell and
Jitendra Malik},
title = {Rich feature hierarchies for accurate object detection and semantic
segmentation},
journal = {CoRR},
volume = {abs/1311.2524},
year = {2013},
url = {http://arxiv.org/abs/1311.2524},
eprinttype = {arXiv},
eprint = {1311.2524},
timestamp = {Mon, 13 Aug 2018 16:48:09 +0200},
biburl = {https://dblp.org/rec/journals/corr/GirshickDDM13.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{Uijlings2013,
doi = {10.1007/s11263-013-0620-5},
url = {https://doi.org/10.1007/s11263-013-0620-5},
year = {2013},
month = apr,
publisher = {Springer Science and Business Media {LLC}},
volume = {104},
number = {2},
pages = {154--171},
author = {J. R. R. Uijlings and K. E. A. van de Sande and T. Gevers and A. W. M. Smeulders},
title = {Selective Search for Object Recognition},
journal = {International Journal of Computer Vision}
}
@article{DBLP:journals/corr/Girshick15,
author = {Ross B. Girshick},
title = {Fast {R-CNN}},
journal = {CoRR},
volume = {abs/1504.08083},
year = {2015},
url = {http://arxiv.org/abs/1504.08083},
eprinttype = {arXiv},
eprint = {1504.08083},
timestamp = {Mon, 13 Aug 2018 16:49:11 +0200},
biburl = {https://dblp.org/rec/journals/corr/Girshick15.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/RenHG015,
author = {Shaoqing Ren and
Kaiming He and
Ross B. Girshick and
Jian Sun},
title = {Faster {R-CNN:} Towards Real-Time Object Detection with Region Proposal
Networks},
journal = {CoRR},
volume = {abs/1506.01497},
year = {2015},
url = {http://arxiv.org/abs/1506.01497},
eprinttype = {arXiv},
eprint = {1506.01497},
timestamp = {Mon, 13 Aug 2018 16:46:02 +0200},
biburl = {https://dblp.org/rec/journals/corr/RenHG015.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{LeCun2015,
doi = {10.1038/nature14539},
url = {https://doi.org/10.1038/nature14539},
year = {2015},
month = may,
publisher = {Springer Science and Business Media {LLC}},
volume = {521},
number = {7553},
pages = {436--444},
author = {Yann LeCun and Yoshua Bengio and Geoffrey Hinton},
title = {Deep learning},
journal = {Nature}
}
@Article{rs10020299,
AUTHOR = {Gao, Qishuo and Lim, Samsung and Jia, Xiuping},
TITLE = {Hyperspectral Image Classification Using Convolutional Neural Networks and Multiple Feature Learning},
JOURNAL = {Remote Sensing},
VOLUME = {10},
YEAR = {2018},
NUMBER = {2},
ARTICLE-NUMBER = {299},
URL = {https://www.mdpi.com/2072-4292/10/2/299},
ISSN = {2072-4292},
ABSTRACT = {Convolutional neural networks (CNNs) have been extended to hyperspectral imagery (HSI) classification due to its better feature representation and high performance, whereas multiple feature learning has shown its effectiveness in computer vision areas. This paper proposes a novel framework that takes advantage of both CNNs and multiple feature learning to better predict the class labels for HSI pixels. We built a novel CNN architecture with various features extracted from the raw imagery as input. The network generates the corresponding relevant feature maps for the input, and the generated feature maps are fed into a concatenating layer to form a joint feature map. The obtained joint feature map is then input to the subsequent layers to predict the final labels for each hyperspectral pixel. The proposed method not only takes advantage of enhanced feature extraction from CNNs, but also fully exploits the spectral and spatial information jointly. The effectiveness of the proposed method is tested with three benchmark data sets, and the results show that the CNN-based multi-feature learning framework improves the classification accuracy significantly.},
DOI = {10.3390/rs10020299}
}
@ARTICLE{7214350,
author={Lahat, Dana and Adali, Tülay and Jutten, Christian},
journal={Proceedings of the IEEE},
title={Multimodal Data Fusion: An Overview of Methods, Challenges, and Prospects},
year={2015},
volume={103},
number={9},
pages={1449-1477},
doi={10.1109/JPROC.2015.2460697}}
@inproceedings{NIPS2012_c399862d,
author = {Krizhevsky, Alex and Sutskever, Ilya and Hinton, Geoffrey E},
booktitle = {Advances in Neural Information Processing Systems},
editor = {F. Pereira and C.J. Burges and L. Bottou and K.Q. Weinberger},
pages = {},
publisher = {Curran Associates, Inc.},
title = {ImageNet Classification with Deep Convolutional Neural Networks},
url = {https://proceedings.neurips.cc/paper_files/paper/2012/file/c399862d3b9d6b76c8436e924a68c45b-Paper.pdf},
volume = {25},
year = {2012}
}
@Article{s19071599,
AUTHOR = {Uddin, Md Azher and Lee, Young-Koo},
TITLE = {Feature Fusion of Deep Spatial Features and Handcrafted Spatiotemporal Features for Human Action Recognition},
JOURNAL = {Sensors},
VOLUME = {19},
YEAR = {2019},
NUMBER = {7},
ARTICLE-NUMBER = {1599},
URL = {https://www.mdpi.com/1424-8220/19/7/1599},
PubMedID = {30987018},
ISSN = {1424-8220},
ABSTRACT = {Human action recognition plays a significant part in the research community due to its emerging applications. A variety of approaches have been proposed to resolve this problem, however, several issues still need to be addressed. In action recognition, effectively extracting and aggregating the spatial-temporal information plays a vital role to describe a video. In this research, we propose a novel approach to recognize human actions by considering both deep spatial features and handcrafted spatiotemporal features. Firstly, we extract the deep spatial features by employing a state-of-the-art deep convolutional network, namely Inception-Resnet-v2. Secondly, we introduce a novel handcrafted feature descriptor, namely Weber’s law based Volume Local Gradient Ternary Pattern (WVLGTP), which brings out the spatiotemporal features. It also considers the shape information by using gradient operation. Furthermore, Weber’s law based threshold value and the ternary pattern based on an adaptive local threshold is presented to effectively handle the noisy center pixel value. Besides, a multi-resolution approach for WVLGTP based on an averaging scheme is also presented. Afterward, both these extracted features are concatenated and feed to the Support Vector Machine to perform the classification. Lastly, the extensive experimental analysis shows that our proposed method outperforms state-of-the-art approaches in terms of accuracy.},
DOI = {10.3390/s19071599}
}
@INPROCEEDINGS{8099726,
author={Huang, Gao and Liu, Zhuang and Van Der Maaten, Laurens and Weinberger, Kilian Q.},
booktitle={2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
title={Densely Connected Convolutional Networks},
year={2017},
volume={},
number={},
pages={2261-2269},
doi={10.1109/CVPR.2017.243}}
@misc{hatamizadeh2023global,
title={Global Context Vision Transformers},
author={Ali Hatamizadeh and Hongxu Yin and Greg Heinrich and Jan Kautz and Pavlo Molchanov},
year={2023},
eprint={2206.09959},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
@article{DBLP:journals/corr/abs-2009-14082,
author = {Yimian Dai and
Fabian Gieseke and
Stefan Oehmcke and
Yiquan Wu and
Kobus Barnard},
title = {Attentional Feature Fusion},
journal = {CoRR},
volume = {abs/2009.14082},
year = {2020},
url = {https://arxiv.org/abs/2009.14082},
eprinttype = {arXiv},
eprint = {2009.14082},
timestamp = {Wed, 30 Sep 2020 16:16:22 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2009-14082.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/LiuRB15,
author = {Wei Liu and
Andrew Rabinovich and
Alexander C. Berg},
title = {ParseNet: Looking Wider to See Better},
journal = {CoRR},
volume = {abs/1506.04579},
year = {2015},
url = {http://arxiv.org/abs/1506.04579},
eprinttype = {arXiv},
eprint = {1506.04579},
timestamp = {Mon, 13 Aug 2018 16:48:41 +0200},
biburl = {https://dblp.org/rec/journals/corr/LiuRB15.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1709-01507,
author = {Jie Hu and
Li Shen and
Gang Sun},
title = {Squeeze-and-Excitation Networks},
journal = {CoRR},
volume = {abs/1709.01507},
year = {2017},
url = {http://arxiv.org/abs/1709.01507},
eprinttype = {arXiv},
eprint = {1709.01507},
timestamp = {Wed, 11 Aug 2021 09:47:11 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-1709-01507.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{NEURIPS2019_bdbca288,
author = {Paszke, Adam and Gross, Sam and Massa, Francisco and Lerer, Adam and Bradbury, James and Chanan, Gregory and Killeen, Trevor and Lin, Zeming and Gimelshein, Natalia and Antiga, Luca and Desmaison, Alban and Kopf, Andreas and Yang, Edward and DeVito, Zachary and Raison, Martin and Tejani, Alykhan and Chilamkurthy, Sasank and Steiner, Benoit and Fang, Lu and Bai, Junjie and Chintala, Soumith},
booktitle = {Advances in Neural Information Processing Systems},
editor = {H. Wallach and H. Larochelle and A. Beygelzimer and F. d\textquotesingle Alch\'{e}-Buc and E. Fox and R. Garnett},
pages = {},
publisher = {Curran Associates, Inc.},
title = {PyTorch: An Imperative Style, High-Performance Deep Learning Library},
url = {https://proceedings.neurips.cc/paper_files/paper/2019/file/bdbca288fee7f92f2bfa9f7012727740-Paper.pdf},
volume = {32},
year = {2019}
}
@misc{jacobgilpytorchcam,
title={PyTorch library for CAM methods},
author={Jacob Gildenblat and contributors},
year={2021},
publisher={GitHub},
howpublished={\url{https://github.com/jacobgil/pytorch-grad-cam}},
}