Skip to content

Commit

Permalink
changed DOIs
Browse files Browse the repository at this point in the history
  • Loading branch information
JaynouOliver committed May 31, 2024
1 parent 2b18cf4 commit b919df4
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 39 deletions.
2 changes: 1 addition & 1 deletion papers/Suvrakamal_Das/main.md
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ The combination of Mamba's language understanding capabilities and Scipy's scien
* **Enhancing Model Efficiency and Scalability:** Integrating Mamba with Scipy's optimization and parallelization techniques can potentially improve the efficiency and scalability of language models, enabling them to handle increasingly larger datasets and more complex scientific problems.
* **Advancing Scientific Computing through Interdisciplinary Collaboration:** The synergy between Mamba and Scipy fosters interdisciplinary collaboration between natural language processing researchers, scientific computing experts, and domain-specific scientists, paving the way for novel applications and pushing the boundaries of scientific computing.

The diverse range of models as U-Mamba [@ma2024umamba], Vision Mamba[@zhu2024vision], VMamba [@liu2024vmamba], MambaByte [@wang2024mambabyte], FourierMamba [@li2024fouriermamba], and Jamba [@lieber2024jamba], highlights the versatility and adaptability of the Mamba architecture. These variants have been designed to enhance efficiency, improve long-range dependency modeling, incorporate visual representations, explore token-free approaches, integrate Fourier learning, and hybridize with Transformer components.
The diverse range of models as U-Mamba [@ma2024umamba], Vision Mamba[@zhu2024vision], VMamba [@liu2024vmamba], MambaByte [@wang2024mambabyte]and Jamba [@lieber2024jamba], highlights the versatility and adaptability of the Mamba architecture. These variants have been designed to enhance efficiency, improve long-range dependency modeling, incorporate visual representations, explore token-free approaches, integrate Fourier learning, and hybridize with Transformer components.

### Conclusion

Expand Down
81 changes: 43 additions & 38 deletions papers/Suvrakamal_Das/mybib.bib
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ @misc{gu2023mamba
year={2023},
eprint={2312.00752},
archivePrefix={arXiv},
primaryClass={cs.LG}
primaryClass={cs.LG},
DOI={https://doi.org/10.48550/arXiv.2312.00752}
}

@misc{vaswani2023attention,
Expand All @@ -28,7 +29,8 @@ @misc{vaswani2023attention
year={2023},
eprint={1706.03762},
archivePrefix={arXiv},
primaryClass={cs.CL}
primaryClass={cs.CL},
DOI={https://doi.org/10.48550/arXiv.1706.03762}
}

@misc{dao2023flashattention2,
Expand All @@ -37,7 +39,8 @@ @misc{dao2023flashattention2
year={2023},
eprint={2307.08691},
archivePrefix={arXiv},
primaryClass={cs.LG}
primaryClass={cs.LG},
DOI={https://doi.org/10.48550/arXiv.2307.08691}
}

@misc{dao2022flashattention,
Expand All @@ -55,7 +58,8 @@ @misc{gu2020hippo
year={2020},
eprint={2008.07669},
archivePrefix={arXiv},
primaryClass={cs.LG}
primaryClass={cs.LG},
DOI={https://doi.org/10.48550/arXiv.2008.07669}
}

@misc{gu2022efficiently,
Expand All @@ -64,7 +68,8 @@ @misc{gu2022efficiently
year={2022},
eprint={2111.00396},
archivePrefix={arXiv},
primaryClass={cs.LG}
primaryClass={cs.LG},
DOI={https://doi.org/10.48550/arXiv.2111.00396}
}

@misc{gu2022parameterization,
Expand All @@ -73,7 +78,8 @@ @misc{gu2022parameterization
year={2022},
eprint={2206.11893},
archivePrefix={arXiv},
primaryClass={cs.LG}
primaryClass={cs.LG},
DOI={https://doi.org/10.48550/arXiv.2206.11893}
}

@misc{martin2018parallelizing,
Expand All @@ -82,7 +88,8 @@ @misc{martin2018parallelizing
year={2018},
eprint={1709.04057},
archivePrefix={arXiv},
primaryClass={cs.NE}
primaryClass={cs.NE},
DOI={https://doi.org/10.48550/arXiv.1709.04057}
}

@misc{lu2023structured,
Expand All @@ -91,7 +98,8 @@ @misc{lu2023structured
year={2023},
eprint={2303.03982},
archivePrefix={arXiv},
primaryClass={cs.LG}
primaryClass={cs.LG},
DOI={https://doi.org/10.48550/arXiv.2303.03982}
}

@misc{peng2023rwkv,
Expand All @@ -100,7 +108,8 @@ @misc{peng2023rwkv
year={2023},
eprint={2305.13048},
archivePrefix={arXiv},
primaryClass={cs.CL}
primaryClass={cs.CL},
DOI={https://doi.org/10.48550/arXiv.2305.13048}
}

@misc{sun2023retentive,
Expand All @@ -109,7 +118,8 @@ @misc{sun2023retentive
year={2023},
eprint={2307.08621},
archivePrefix={arXiv},
primaryClass={cs.CL}
primaryClass={cs.CL},
DOI={https://doi.org/10.48550/arXiv.2307.08621}
}

@misc{elfwing2017sigmoidweighted,
Expand All @@ -118,7 +128,8 @@ @misc{elfwing2017sigmoidweighted
year={2017},
eprint={1702.03118},
archivePrefix={arXiv},
primaryClass={cs.LG}
primaryClass={cs.LG},
DOI={https://doi.org/10.48550/arXiv.1702.03118}
}

@misc{oshea2015introduction,
Expand All @@ -127,7 +138,8 @@ @misc{oshea2015introduction
year={2015},
eprint={1511.08458},
archivePrefix={arXiv},
primaryClass={cs.NE}
primaryClass={cs.NE},
DOI={https://doi.org/10.48550/arXiv.1511.08458}
}


Expand All @@ -137,7 +149,8 @@ @misc{lim2024parallelizing
year={2024},
eprint={2309.12252},
archivePrefix={arXiv},
primaryClass={cs.LG}
primaryClass={cs.LG},
DOI={https://doi.org/10.48550/arXiv.2309.12252}
}


Expand All @@ -148,15 +161,17 @@ @misc{ma2024umamba
year={2024},
eprint={2401.04722},
archivePrefix={arXiv},
primaryClass={eess.IV}
primaryClass={eess.IV},
DOI={https://doi.org/10.48550/arXiv.2401.04722}
}
@misc{zhu2024vision,
title={Vision Mamba: Efficient Visual Representation Learning with Bidirectional State Space Model},
author={Lianghui Zhu and Bencheng Liao and Qian Zhang and Xinlong Wang and Wenyu Liu and Xinggang Wang},
year={2024},
eprint={2401.09417},
archivePrefix={arXiv},
primaryClass={cs.CV}
primaryClass={cs.CV},
DOI={https://doi.org/10.48550/arXiv.2401.09417}
}

@misc{liu2024vmamba,
Expand All @@ -165,66 +180,56 @@ @misc{liu2024vmamba
year={2024},
eprint={2401.10166},
archivePrefix={arXiv},
primaryClass={cs.CV}
primaryClass={cs.CV},
DOI={https://doi.org/10.48550/arXiv.2401.10166}
}
@misc{wang2024mambabyte,
title={MambaByte: Token-free Selective State Space Model},
author={Junxiong Wang and Tushaar Gangavarapu and Jing Nathan Yan and Alexander M. Rush},
year={2024},
eprint={2401.13660},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{li2024fouriermamba,
title={FourierMamba: Fourier Learning Integration with State Space Models for Image Deraining},
author={Dong Li and Yidi Liu and Xueyang Fu and Senyan Xu and Zheng-Jun Zha},
year={2024},
eprint={2405.19450},
archivePrefix={arXiv},
primaryClass={cs.CV}
primaryClass={cs.CL},
DOI={https://doi.org/10.48550/arXiv.2401.13660}
}

@misc{lieber2024jamba,
title={Jamba: A Hybrid Transformer-Mamba Language Model},
author={Opher Lieber and Barak Lenz and Hofit Bata and Gal Cohen and Jhonathan Osin and Itay Dalmedigos and Erez Safahi and Shaked Meirom and Yonatan Belinkov and Shai Shalev-Shwartz and Omri Abend and Raz Alon and Tomer Asida and Amir Bergman and Roman Glozman and Michael Gokhman and Avashalom Manevich and Nir Ratner and Noam Rozen and Erez Shwartz and Mor Zusman and Yoav Shoham},
year={2024},
eprint={2403.19887},
archivePrefix={arXiv},
primaryClass={cs.CL}
primaryClass={cs.CL},
DOI={https://doi.org/10.48550/arXiv.2403.19887}
}
@misc{sun2023retentive,
title={Retentive Network: A Successor to Transformer for Large Language Models},
author={Yutao Sun and Li Dong and Shaohan Huang and Shuming Ma and Yuqing Xia and Jilong Xue and Jianyong Wang and Furu Wei},
year={2023},
eprint={2307.08621},
archivePrefix={arXiv},
primaryClass={cs.CL}
primaryClass={cs.CL},
DOI={https://doi.org/10.48550/arXiv.2307.08621}
}
@misc{de2024griffin,
title={Griffin: Mixing Gated Linear Recurrences with Local Attention for Efficient Language Models},
author={Soham De and Samuel L. Smith and Anushan Fernando and Aleksandar Botev and George Cristian-Muraru and Albert Gu and Ruba Haroun and Leonard Berrada and Yutian Chen and Srivatsan Srinivasan and Guillaume Desjardins and Arnaud Doucet and David Budden and Yee Whye Teh and Razvan Pascanu and Nando De Freitas and Caglar Gulcehre},
year={2024},
eprint={2402.19427},
archivePrefix={arXiv},
primaryClass={cs.LG}
primaryClass={cs.LG},
DOI={https://doi.org/10.48550/arXiv.2402.19427}
}
@misc{poli2023hyena,
title={Hyena Hierarchy: Towards Larger Convolutional Language Models},
author={Michael Poli and Stefano Massaroli and Eric Nguyen and Daniel Y. Fu and Tri Dao and Stephen Baccus and Yoshua Bengio and Stefano Ermon and Christopher Ré},
year={2023},
eprint={2302.10866},
archivePrefix={arXiv},
primaryClass={cs.LG}
}
@misc{peng2023rwkv,
title={RWKV: Reinventing RNNs for the Transformer Era},
author={Bo Peng and Eric Alcaide and Quentin Anthony and Alon Albalak and Samuel Arcadinho and Stella Biderman and Huanqi Cao and Xin Cheng and Michael Chung and Matteo Grella and Kranthi Kiran GV and Xuzheng He and Haowen Hou and Jiaju Lin and Przemyslaw Kazienko and Jan Kocon and Jiaming Kong and Bartlomiej Koptyra and Hayden Lau and Krishna Sri Ipsit Mantri and Ferdinand Mom and Atsushi Saito and Guangyu Song and Xiangru Tang and Bolun Wang and Johan S. Wind and Stanislaw Wozniak and Ruichong Zhang and Zhenyuan Zhang and Qihang Zhao and Peng Zhou and Qinghua Zhou and Jian Zhu and Rui-Jie Zhu},
year={2023},
eprint={2305.13048},
archivePrefix={arXiv},
primaryClass={cs.CL}
primaryClass={cs.LG},
DOI={https://doi.org/10.48550/arXiv.2302.10866}
}
# These references may be helpful:
@inproceedings{jupyter,
abstract = {It is increasingly necessary for researchers in all fields to write computer code, and in order to reproduce research results, it is important that this code is published. We present Jupyter notebooks, a document format for publishing code, results and explanations in a form that is both readable and executable. We discuss various tools and use cases for notebook documents.},
author = {Kluyver, Thomas and Ragan-Kelley, Benjamin and Pérez, Fernando and Granger, Brian and Bussonnier, Matthias and Frederic, Jonathan and Kelley, Kyle and Hamrick, Jessica and Grout, Jason and Corlay, Sylvain and Ivanov, Paul and Avila, Damián and Abdalla, Safia and Willing, Carol and {Jupyter development team}},
Expand Down

0 comments on commit b919df4

Please sign in to comment.