Skip to content

Commit

Permalink
Updates
Browse files Browse the repository at this point in the history
  • Loading branch information
artemisp committed Sep 7, 2024
1 parent ddc827f commit 7e97de3
Show file tree
Hide file tree
Showing 7 changed files with 117 additions and 47 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@ _site
.tweet-cache
Gemfile.lock
vendor
.history

112 changes: 86 additions & 26 deletions _bibliography/papers.bib
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
---
---
@article{yang2021visual,
bibtex_show={true},
title={Visual goal-step inference using wikiHow},
@inproceedings{yang2021visual,
title={Visual Goal-Step Inference using wikiHow},
author={Yang, Yue and Panagopoulou, Artemis and Lyu, Qing and Zhang, Li and Yatskar, Mark and Callison-Burch, Chris},
journal={arXiv preprint arXiv:2104.05845},
booktitle={Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing},
pages={2167--2179},
year={2021}
}

Expand All @@ -33,34 +32,95 @@ @article{panagopoulouquakerbot
title={QuakerBot: A Household Dialog System Powered by Large Language Models},
year={2022},
author={Panagopoulou, Artemis and Cugini, Manni Arora Li Zhang Dimitri and You, Weiqiu and Zhou, Yue Yang Liyang and Hou, Yuxuan Wang Zhaoyi and Hwang, Alyssa and Martin, Lara and Callison-Burch, Sherry Shi Chris and Yatskar, Mark},
journal={Alexa Prize TaskBot Challenge Proceedings}
}

@article{yang2022visualizing,
bibtex_show={true},
title={Visualizing the Obvious: A Concreteness-based Ensemble Model for Noun Property Prediction},
author={Yang, Yue and Panagopoulou, Artemis and Apidianaki, Marianna and Yatskar, Mark and Callison-Burch, Chris},
journal={arXiv preprint arXiv:2210.12905},
year={2022}
@inproceedings{yang-etal-2022-visualizing,
title = "Visualizing the Obvious: A Concreteness-based Ensemble Model for Noun Property Prediction",
author = "Yang, Yue and
Panagopoulou, Artemis and
Apidianaki, Marianna and
Yatskar, Mark and
Callison-Burch, Chris",
editor = "Goldberg, Yoav and
Kozareva, Zornitsa and
Zhang, Yue",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2022",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.findings-emnlp.45",
doi = "10.18653/v1/2022.findings-emnlp.45",
pages = "638--655",
abstract = "Neural language models encode rich knowledge about entities and their relationships which can be extracted from their representations using probing. Common properties of nouns (e.g., red strawberries, small ant) are, however, more challenging to extract compared to other types of knowledge because they are rarely explicitly stated in texts.We hypothesize this to mainly be the case for perceptual properties which are obvious to the participants in the communication. We propose to extract these properties from images and use them in an ensemble model, in order to complement the information that is extracted from language models. We consider perceptual properties to be more concrete than abstract properties (e.g., interesting, flawless). We propose to use the adjectives{'} concreteness score as a lever to calibrate the contribution of each source (text vs. images). We evaluate our ensemble model in a ranking task where the actual properties of a noun need to be ranked higher than other non-relevant properties. Our results show that the proposed combination of text and images greatly improves noun property prediction compared to powerful text-based language models.",
}

@article{yang2022language,
bibtex_show={true},
title={Language in a Bottle: Language Model Guided Concept Bottlenecks for Interpretable Image Classification},
author={Yang, Yue and Panagopoulou, Artemis and Zhou, Shenghao and Jin, Daniel and Callison-Burch, Chris and Yatskar, Mark},
journal={arXiv preprint arXiv:2211.11158},
year={2022}
@InProceedings{Yang_2023_CVPR,
author = {Yang, Yue and Panagopoulou, Artemis and Zhou, Shenghao and Jin, Daniel and Callison-Burch, Chris and Yatskar, Mark},
title = {Language in a Bottle: Language Model Guided Concept Bottlenecks for Interpretable Image Classification},
booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
month = {June},
year = {2023},
pages = {19187-19197}
}

@inproceedings{chakrabarty-etal-2023-spy,
title = "{I} Spy a Metaphor: Large Language Models and Diffusion Models Co-Create Visual Metaphors",
author = "Chakrabarty, Tuhin and
Saakyan, Arkadiy and
Winn, Olivia and
Panagopoulou, Artemis and
Yang, Yue and
Apidianaki, Marianna and
Muresan, Smaranda",
editor = "Rogers, Anna and
Boyd-Graber, Jordan and
Okazaki, Naoaki",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2023",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-acl.465",
doi = "10.18653/v1/2023.findings-acl.465",
pages = "7370--7388",
abstract = "Visual metaphors are powerful rhetorical devices used to persuade or communicate creative ideas through images. Similar to linguistic metaphors, they convey meaning implicitly through symbolism and juxtaposition of the symbols. We propose a new task of generating visual metaphors from linguistic metaphors. This is a challenging task for diffusion-based text-to-image models, such as DALL$\cdot$E 2, since it requires the ability to model implicit meaning and compositionality. We propose to solve the task through the collaboration between Large Language Models (LLMs) and Diffusion Models: Instruct GPT-3 (davinci-002) with Chain-of-Thought prompting generates text that represents a visual elaboration of the linguistic metaphor containing the implicit meaning and relevant objects, which is then used as input to the diffusion-based text-to-image models. Using a human-AI collaboration framework, where humans interact both with the LLM and the top-performing diffusion model, we create a high-quality dataset containing 6,476 visual metaphors for 1,540 linguistic metaphors and their associated visual elaborations. Evaluation by professional illustrators shows the promise of LLM-Diffusion Model collaboration for this task.To evaluate the utility of our Human-AI collaboration framework and the quality of our dataset, we perform both an intrinsic human-based evaluation and an extrinsic evaluation using visual entailment as a downstream task.",
}

@article{chakrabarty2023spy,
title={I spy a metaphor: Large language models and diffusion models co-create visual metaphors},
author={Chakrabarty, Tuhin and Saakyan, Arkadiy and Winn, Olivia and Panagopoulou, Artemis and Yang, Yue and Apidianaki, Marianna and Muresan, Smaranda},
journal={arXiv preprint arXiv:2305.14724},
year={2023}
@inproceedings{xue2024ulip,
title={Ulip-2: Towards scalable multimodal pre-training for 3d understanding},
author={Xue, Le and Yu, Ning and Zhang, Shu and Panagopoulou, Artemis and Li, Junnan and Mart{\'\i}n-Mart{\'\i}n, Roberto and Wu, Jiajun and Xiong, Caiming and Xu, Ran and Niebles, Juan Carlos and others},
booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
pages={27091--27101},
year={2024}
}


@inproceedings{panagopoulou-etal-2024-evaluating,
title = "Evaluating Vision-Language Models on Bistable Images",
author = "Panagopoulou, Artemis and
Melkin, Coby and
Callison-Burch, Chris",
editor = "Kuribayashi, Tatsuki and
Rambelli, Giulia and
Takmaz, Ece and
Wicke, Philipp and
Oseki, Yohei",
booktitle = "Proceedings of the Workshop on Cognitive Modeling and Computational Linguistics",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.cmcl-1.2",
pages = "8--29",
abstract = "Bistable images, also known as ambiguous or reversible images, present visual stimuli that can be seen in two distinct interpretations, though not simultaneously, by the observer. In this study, we conduct the most extensive examination of vision-language models using bistable images to date. We manually gathered a dataset of 29 bistable images, along with their associated labels, and subjected them to 121 different manipulations in brightness, resolution, tint, and rotation. We evaluated twelve different models in both classification and generative tasks across six model architectures. Our findings reveal that, with the exception of models from the Idefics family and LLaVA1.5-13b, there is a pronounced preference for one interpretation over another among the models, and minimal variance under image manipulations, with few exceptions on image rotations. Additionally, we compared the models{'} preferences with humans, noting that the models do not exhibit the same continuity biases as humans and often diverge from human initial interpretations. We also investigated the influence of variations in prompts and the use of synonymous labels, discovering that these factors significantly affect model interpretations more than image manipulations showing a higher influence of the language priors on bistable image interpretations compared to image-text training data. All code and data is open sourced.",
}

@article{panagopoulou2023x,
title={X-InstructBLIP: A Framework for aligning X-Modal instruction-aware representations to LLMs and Emergent Cross-modal Reasoning},
title={X-instructblip: A framework for aligning x-modal instruction-aware representations to llms and emergent cross-modal reasoning},
author={Panagopoulou, Artemis and Xue, Le and Yu, Ning and Li, Junnan and Li, Dongxu and Joty, Shafiq and Xu, Ran and Savarese, Silvio and Xiong, Caiming and Niebles, Juan Carlos},
journal={arXiv preprint arXiv:2311.18799},
year={2023}
}
booktitle={European Conference on Computer Vision},
year={2024},
organization={Springer}
}
27 changes: 10 additions & 17 deletions _data/cv.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
contents:
- title: Research Summer Intern
institution: Salesforce Research
year: 2023
year: 2023-2024
description:
- Conducted multimodal AI research under the supervision of Dr. Juan Carlos Niebles.
- title: Co-Founder
Expand Down Expand Up @@ -56,34 +56,27 @@
- title: Teaching
type: time_table
contents:
- title: Instructor
- title: Instructor, Introduction to Computer Science
institution: Prison Teaching Initiative, Princeton University, Southwoods Prison
course: Introduction to Computer Science
year: F22
- title: Instructor
- title: Instructor, Introduction to Python (Coding Club)
institution: Kohelet-Yeshiva
course: Introduction to Python (Coding Club)
year: F21 - Sp22
- title: Teaching Assistant
- title: Teaching Assistant, CIS530 - Natural Language Processing
institution: University of Pennsylvania, Computer and Information Science
course: CIS530 - Natural Language Processing
year: F22
- title: Teaching Assistant
year: F22, F24
- title: Teaching Assistant, CIS 700 - Interactive Fiction and Text Generation
institution: University of Pennsylvania, Computer and Information Science
course: CIS 700 - Interactive Fiction and Text Generation
year: Sp22
- title: Teaching Assistant
- title: Teaching Assistant, CIS 521 - Introduction to Artificial Intelligence
institution: University of Pennsylvania, Computer and Information Science
course: CIS 521 - Introduction to Artificial Intelligence
year: F021
- title: Teaching Assistant
- title: Teaching Assistant, MCIT 592 - Mathematical Foundations of Computer Science
institution: University of Pennsylvania, Computer and Information Science
course: MCIT 592 - Mathematical Foundations of Computer Science
year: Sum18-Sp19
- title: Teaching Assistant
- title: Teaching Assistant, CIS 262 - Automata, Computability, and Complexity
institution: University of Pennsylvania, Computer and Information Science
course: CIS 262 - Automata, Computability, and Complexity
year: Sp018
year: Sp18

- title: Honors and Awards
type: time_table
Expand Down
8 changes: 8 additions & 0 deletions _news/24_08_17.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
---
layout: post
date: 2024-08-17 07:59:00-0400
inline: true
related_posts: false
---

📢 **Announcement: Our paper [X-InstructBLIP: A Framework for aligning X-Modal instruction-aware representations to LLMs and Emergent Cross-modal Reasoning](https://arxiv.org/pdf/2311.18799) has been accepted to [ECCV 2024](https://eccv.ecva.net/Conferences/2024)!🎉**
8 changes: 8 additions & 0 deletions _news/24_08_29.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
---
layout: post
date: 2024-08-29 07:59:00-0400
inline: true
related_posts: false
---

📢 **Announcement: Our paper [Evaluating Vision-Language Models on Bistable Images](https://arxiv.org/abs/2405.19423) has received best paper award at [CMCL 2024](https://cmclorg.github.io)!🎉🏆**
8 changes: 4 additions & 4 deletions _pages/about.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@ latest_posts: false # includes a list of the newest posts
selected_papers: false # includes a list of papers marked as "selected={true}"
social: false # includes social icons at the bottom of the page
---
I am a third year PhD student at the University of Pennsylvania working in the intersection of Natural Language Processing and Computer Vision under the supervision of Professor Chris Callison-Burch and Professor Mark Yatskar.
I am a fourth year PhD student at the University of Pennsylvania working in the intersection of Natural Language Processing and Computer Vision under the supervision of Professor Chris Callison-Burch and Professor Mark Yatskar.

My interest lies in the study of knowledge and its acquisition, encoding, and communication. I recognize that knowledge encompasses more than just language - especially for procedural information - and therefore my research explores the importance of multimodality in knowledge encoding and transmission. I examine the impact of sensory inputs and mental experiences on our understanding of the world. My higher-arching goal is to gain a deeper understanding of the relationship between knowledge, perception, and communication and how they can be utilized for a comprehensive view of the world.
My research focuses on advancing multimodal AI by integrating diverse modalities such as images, audio, video, text, and 3D. I address challenges in multimodal integration, benchmark development, and enhancing interpretability to build trustworthy models. My mission is to craft models that can see, listen, and comprehend with the nuance of perceptual coherence—models that are as robust as they are insightful, and as interpretable as they are performant, bringing us closer to a future where machines are not just tools, but reliable, insightful collaborators.

In addition to my academic pursuits, I have a strong passion for education. As a Teaching Assistant at the University of Pennsylvania, and through my community teaching experiences, I have acquired a teaching style that prioritizes creating a comfortable and inclusive environment for learning. I strive to challenge students with the beautiful and mentally stimulating concepts of mathematics, logic, and computer science, while also breaking down any mental barriers that may have been created from past negative experiences.
In addition to my academic pursuits, I have a strong passion for education. As a Teaching Assistant at the University of Pennsylvania, and through my community teaching experiences, I strive to challenge students with the beautiful and mentally stimulating concepts of mathematics, logic, and computer science, while also breaking down any mental barriers that may have been created from past negative experiences.

<b>I am convinced that computer science is a field accessible to all, no matter their background, identity, or prior experience. In our technology-driven society, enabling people from various walks of life to contribute to and shape the future of computer science is not just advantageous but vital for creating strong and inclusive technological solutions.</b>
<b>I am convinced that computer science is a field accessible to all, no matter their background, identity, or prior experience. In our technology-driven society, enabling people from various backgrounds and experiences to contribute to and shape the future of computer science is essential for creating strong and inclusive technological solutions.</b>
Binary file modified assets/pdf/resume.pdf
Binary file not shown.

0 comments on commit 7e97de3

Please sign in to comment.