From af001042c932d0a1c50e225f71308af1e206f842 Mon Sep 17 00:00:00 2001 From: Muennighoff Date: Sun, 14 Aug 2022 20:21:39 +0200 Subject: [PATCH 1/9] Add xlsum --- .../templates/GEM/xlsum/templates.yaml | 54 +++++++++++++++++++ .../templates/GEM/xsum/templates.yaml | 2 +- 2 files changed, 55 insertions(+), 1 deletion(-) create mode 100644 promptsource/templates/GEM/xlsum/templates.yaml diff --git a/promptsource/templates/GEM/xlsum/templates.yaml b/promptsource/templates/GEM/xlsum/templates.yaml new file mode 100644 index 000000000..4b187c854 --- /dev/null +++ b/promptsource/templates/GEM/xlsum/templates.yaml @@ -0,0 +1,54 @@ +dataset: GEM/xlsum +templates: + 419726f2-7140-4ab6-a18d-a5f9cc709a47: !Template + answer_choices: null + id: 419726f2-7140-4ab6-a18d-a5f9cc709a47 + jinja: 'Doc to summarize: {{document}}\nSummary in the same language as the doc: ||| {{target}}' + metadata: !TemplateMetadata + choices_in_prompt: false + languages: [] + metrics: + - ROUGE + - BLEU + original_task: true + name: docsummary + reference: '' + 6b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8: !Template + answer_choices: null + id: 6b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8 + jinja: 'Content: {{document}}\nThe previous content can be summarized as follows: ||| {{target}}' + metadata: !TemplateMetadata + choices_in_prompt: false + languages: [] + metrics: + - ROUGE + - BLEU + original_task: true + name: prevcontent + reference: '' + 8d3584c5-8864-4d11-bce9-65499cdef4cb: !Template + answer_choices: null + id: 8d3584c5-8864-4d11-bce9-65499cdef4cb + jinja: '{{document}}\n\ntl;dr: ||| {{target}}' + metadata: !TemplateMetadata + choices_in_prompt: false + languages: [] + metrics: + - ROUGE + - BLEU + original_task: true + name: tldr + reference: '' + ec0096ea-e9db-4e96-85b4-0740085fee55: !Template + answer_choices: null + id: ec0096ea-e9db-4e96-85b4-0740085fee55 + jinja: '{{document}} \n\nA good summary of the text above: ||| {{target}}' + metadata: !TemplateMetadata + choices_in_prompt: false + languages: [] + metrics: + - ROUGE + - BLEU + original_task: true + name: goodsummary + reference: '' diff --git a/promptsource/templates/GEM/xsum/templates.yaml b/promptsource/templates/GEM/xsum/templates.yaml index c2aa8b80c..49a767148 100644 --- a/promptsource/templates/GEM/xsum/templates.yaml +++ b/promptsource/templates/GEM/xsum/templates.yaml @@ -3,7 +3,7 @@ templates: 019726f2-7140-4ab6-a18d-a5f9cc709a47: !Template answer_choices: null id: 019726f2-7140-4ab6-a18d-a5f9cc709a47 - jinja: 'Summarize: {{document}} ||| {{target}}' + jinja: 'Summarize: {{document}} Summary: ||| {{target}}' metadata: !TemplateMetadata choices_in_prompt: false languages: [] From 116014ffd136b26cf08fafd2a5c6fd835de2bbb2 Mon Sep 17 00:00:00 2001 From: Muennighoff Date: Sun, 14 Aug 2022 20:23:56 +0200 Subject: [PATCH 2/9] Add subset --- promptsource/templates/GEM/xlsum/{ => english}/templates.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename promptsource/templates/GEM/xlsum/{ => english}/templates.yaml (98%) diff --git a/promptsource/templates/GEM/xlsum/templates.yaml b/promptsource/templates/GEM/xlsum/english/templates.yaml similarity index 98% rename from promptsource/templates/GEM/xlsum/templates.yaml rename to promptsource/templates/GEM/xlsum/english/templates.yaml index 4b187c854..0c7ef7dfd 100644 --- a/promptsource/templates/GEM/xlsum/templates.yaml +++ b/promptsource/templates/GEM/xlsum/english/templates.yaml @@ -1,5 +1,5 @@ dataset: GEM/xlsum -templates: +templates: english 419726f2-7140-4ab6-a18d-a5f9cc709a47: !Template answer_choices: null id: 419726f2-7140-4ab6-a18d-a5f9cc709a47 From 50ac11d05f0c4caa7648f5c504ab69c18cc0556f Mon Sep 17 00:00:00 2001 From: Muennighoff Date: Sun, 14 Aug 2022 22:17:30 +0200 Subject: [PATCH 3/9] Fix col names --- .../templates/GEM/xlsum/english/templates.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/promptsource/templates/GEM/xlsum/english/templates.yaml b/promptsource/templates/GEM/xlsum/english/templates.yaml index 0c7ef7dfd..e9fcdea07 100644 --- a/promptsource/templates/GEM/xlsum/english/templates.yaml +++ b/promptsource/templates/GEM/xlsum/english/templates.yaml @@ -3,7 +3,7 @@ templates: english 419726f2-7140-4ab6-a18d-a5f9cc709a47: !Template answer_choices: null id: 419726f2-7140-4ab6-a18d-a5f9cc709a47 - jinja: 'Doc to summarize: {{document}}\nSummary in the same language as the doc: ||| {{target}}' + jinja: 'Doc to summarize: {{text}}\nSummary in the same language as the doc: ||| {{summary}}' metadata: !TemplateMetadata choices_in_prompt: false languages: [] @@ -16,7 +16,7 @@ templates: english 6b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8: !Template answer_choices: null id: 6b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8 - jinja: 'Content: {{document}}\nThe previous content can be summarized as follows: ||| {{target}}' + jinja: 'Content: {{text}}\nThe previous content can be summarized as follows: ||| {{summary}}' metadata: !TemplateMetadata choices_in_prompt: false languages: [] @@ -29,7 +29,7 @@ templates: english 8d3584c5-8864-4d11-bce9-65499cdef4cb: !Template answer_choices: null id: 8d3584c5-8864-4d11-bce9-65499cdef4cb - jinja: '{{document}}\n\ntl;dr: ||| {{target}}' + jinja: '{{title}}\n{{text}}\n\ntl;dr: ||| {{summary}}' metadata: !TemplateMetadata choices_in_prompt: false languages: [] @@ -42,7 +42,7 @@ templates: english ec0096ea-e9db-4e96-85b4-0740085fee55: !Template answer_choices: null id: ec0096ea-e9db-4e96-85b4-0740085fee55 - jinja: '{{document}} \n\nA good summary of the text above: ||| {{target}}' + jinja: '{{text}} \n\nA good title for the article above: ||| {{title}}' metadata: !TemplateMetadata choices_in_prompt: false languages: [] @@ -50,5 +50,5 @@ templates: english - ROUGE - BLEU original_task: true - name: goodsummary + name: goodtitle reference: '' From da38ca9c135bfa33cbdf781940f68ee039df5adb Mon Sep 17 00:00:00 2001 From: Muennighoff Date: Sun, 14 Aug 2022 22:34:30 +0200 Subject: [PATCH 4/9] Fix format --- promptsource/templates/GEM/xlsum/english/templates.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/promptsource/templates/GEM/xlsum/english/templates.yaml b/promptsource/templates/GEM/xlsum/english/templates.yaml index e9fcdea07..f040d897a 100644 --- a/promptsource/templates/GEM/xlsum/english/templates.yaml +++ b/promptsource/templates/GEM/xlsum/english/templates.yaml @@ -1,5 +1,6 @@ dataset: GEM/xlsum -templates: english +subset: english +templates: 419726f2-7140-4ab6-a18d-a5f9cc709a47: !Template answer_choices: null id: 419726f2-7140-4ab6-a18d-a5f9cc709a47 From 4b4fe2ad0b95d4eff3778b1aba26ba5494f671fd Mon Sep 17 00:00:00 2001 From: Muennighoff Date: Sun, 14 Aug 2022 22:40:53 +0200 Subject: [PATCH 5/9] fix col name target --- promptsource/templates/GEM/xlsum/english/templates.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/promptsource/templates/GEM/xlsum/english/templates.yaml b/promptsource/templates/GEM/xlsum/english/templates.yaml index f040d897a..e21389c07 100644 --- a/promptsource/templates/GEM/xlsum/english/templates.yaml +++ b/promptsource/templates/GEM/xlsum/english/templates.yaml @@ -4,7 +4,7 @@ templates: 419726f2-7140-4ab6-a18d-a5f9cc709a47: !Template answer_choices: null id: 419726f2-7140-4ab6-a18d-a5f9cc709a47 - jinja: 'Doc to summarize: {{text}}\nSummary in the same language as the doc: ||| {{summary}}' + jinja: 'Doc to summarize: {{text}}\nSummary in the same language as the doc: ||| {{target}}' metadata: !TemplateMetadata choices_in_prompt: false languages: [] @@ -17,7 +17,7 @@ templates: 6b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8: !Template answer_choices: null id: 6b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8 - jinja: 'Content: {{text}}\nThe previous content can be summarized as follows: ||| {{summary}}' + jinja: 'Content: {{text}}\nThe previous content can be summarized as follows: ||| {{target}}' metadata: !TemplateMetadata choices_in_prompt: false languages: [] @@ -30,7 +30,7 @@ templates: 8d3584c5-8864-4d11-bce9-65499cdef4cb: !Template answer_choices: null id: 8d3584c5-8864-4d11-bce9-65499cdef4cb - jinja: '{{title}}\n{{text}}\n\ntl;dr: ||| {{summary}}' + jinja: '{{title}}\n{{text}}\n\ntl;dr: ||| {{target}}' metadata: !TemplateMetadata choices_in_prompt: false languages: [] @@ -43,7 +43,7 @@ templates: ec0096ea-e9db-4e96-85b4-0740085fee55: !Template answer_choices: null id: ec0096ea-e9db-4e96-85b4-0740085fee55 - jinja: '{{text}} \n\nA good title for the article above: ||| {{title}}' + jinja: '{{text}} \n\nGive me a good title for the article above. ||| {{title}}' metadata: !TemplateMetadata choices_in_prompt: false languages: [] From d53f18ad3d63e16ea6392e5feba22b8ccb30dd02 Mon Sep 17 00:00:00 2001 From: Muennighoff Date: Mon, 15 Aug 2022 09:41:45 +0200 Subject: [PATCH 6/9] Add article gen --- .../GEM/xlsum/english/templates.yaml | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/promptsource/templates/GEM/xlsum/english/templates.yaml b/promptsource/templates/GEM/xlsum/english/templates.yaml index e21389c07..c019f8f93 100644 --- a/promptsource/templates/GEM/xlsum/english/templates.yaml +++ b/promptsource/templates/GEM/xlsum/english/templates.yaml @@ -53,3 +53,30 @@ templates: original_task: true name: goodtitle reference: '' + dc0096ea-e9db-4e96-85b4-0740085fee55: !Template + answer_choices: null + id: dc0096ea-e9db-4e96-85b4-0740085fee55 + jinja: 'Given the below title and summary of an article, generate a long article to go along with them. + Title: {{title}}\nSummary: {{target}}\nArticle: ||| {{text}}' + metadata: !TemplateMetadata + choices_in_prompt: false + languages: [] + metrics: + - ROUGE + - BLEU + original_task: true + name: genarticle + reference: '' + hc0096ea-e9db-4e96-85b4-0740085fee55: !Template + answer_choices: null + id: hc0096ea-e9db-4e96-85b4-0740085fee55 + jinja: 'Title: {{title}}\nGiven the above title of an imaginary article, generate the article.\n ||| {{text}}' + metadata: !TemplateMetadata + choices_in_prompt: false + languages: [] + metrics: + - ROUGE + - BLEU + original_task: true + name: imaginearticle + reference: '' From 42f9a3a6ecd326a831451a0c2d200dc6bef535d7 Mon Sep 17 00:00:00 2001 From: Muennighoff Date: Mon, 15 Aug 2022 09:57:38 +0200 Subject: [PATCH 7/9] Shorten texts & add gen article tasks --- .../templates/GEM/xlsum/english/templates.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/promptsource/templates/GEM/xlsum/english/templates.yaml b/promptsource/templates/GEM/xlsum/english/templates.yaml index c019f8f93..356b25848 100644 --- a/promptsource/templates/GEM/xlsum/english/templates.yaml +++ b/promptsource/templates/GEM/xlsum/english/templates.yaml @@ -4,7 +4,7 @@ templates: 419726f2-7140-4ab6-a18d-a5f9cc709a47: !Template answer_choices: null id: 419726f2-7140-4ab6-a18d-a5f9cc709a47 - jinja: 'Doc to summarize: {{text}}\nSummary in the same language as the doc: ||| {{target}}' + jinja: 'Doc to summarize: {{text[:8500]}}\nSummary in the same language as the doc: ||| {{target}}' metadata: !TemplateMetadata choices_in_prompt: false languages: [] @@ -17,7 +17,7 @@ templates: 6b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8: !Template answer_choices: null id: 6b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8 - jinja: 'Content: {{text}}\nThe previous content can be summarized as follows: ||| {{target}}' + jinja: 'Content: {{text[:7000]}}\nThe previous content can be summarized as follows: ||| {{target}}' metadata: !TemplateMetadata choices_in_prompt: false languages: [] @@ -30,7 +30,7 @@ templates: 8d3584c5-8864-4d11-bce9-65499cdef4cb: !Template answer_choices: null id: 8d3584c5-8864-4d11-bce9-65499cdef4cb - jinja: '{{title}}\n{{text}}\n\ntl;dr: ||| {{target}}' + jinja: '{{title}}\n{{text[:6000]}}\n\ntl;dr: ||| {{target}}' metadata: !TemplateMetadata choices_in_prompt: false languages: [] @@ -56,8 +56,8 @@ templates: dc0096ea-e9db-4e96-85b4-0740085fee55: !Template answer_choices: null id: dc0096ea-e9db-4e96-85b4-0740085fee55 - jinja: 'Given the below title and summary of an article, generate a long article to go along with them. - Title: {{title}}\nSummary: {{target}}\nArticle: ||| {{text}}' + jinja: 'Given the below title and summary of an article, generate an article of 500 characters at most to go along with them. + Title: {{title}}\nSummary: {{target}}\nArticle: ||| {{text[:500]}}' metadata: !TemplateMetadata choices_in_prompt: false languages: [] @@ -70,7 +70,7 @@ templates: hc0096ea-e9db-4e96-85b4-0740085fee55: !Template answer_choices: null id: hc0096ea-e9db-4e96-85b4-0740085fee55 - jinja: 'Title: {{title}}\nGiven the above title of an imaginary article, generate the article.\n ||| {{text}}' + jinja: 'Title: {{title}}\nGiven the above title of an imaginary article, imagine the article.\n ||| {{text[:7000]}}' metadata: !TemplateMetadata choices_in_prompt: false languages: [] From 811b863ab1706844ce5e38be808f32b2e2a11092 Mon Sep 17 00:00:00 2001 From: Muennighoff Date: Mon, 15 Aug 2022 10:05:59 +0200 Subject: [PATCH 8/9] Clarify beg of art --- promptsource/templates/GEM/xlsum/english/templates.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/promptsource/templates/GEM/xlsum/english/templates.yaml b/promptsource/templates/GEM/xlsum/english/templates.yaml index 356b25848..dbd16fdf5 100644 --- a/promptsource/templates/GEM/xlsum/english/templates.yaml +++ b/promptsource/templates/GEM/xlsum/english/templates.yaml @@ -56,8 +56,8 @@ templates: dc0096ea-e9db-4e96-85b4-0740085fee55: !Template answer_choices: null id: dc0096ea-e9db-4e96-85b4-0740085fee55 - jinja: 'Given the below title and summary of an article, generate an article of 500 characters at most to go along with them. - Title: {{title}}\nSummary: {{target}}\nArticle: ||| {{text[:500]}}' + jinja: 'Given the below title and summary of an article, generate a short article or the beginning of a long article to go along with them. + Title: {{title}}\nSummary: {{target}}\nArticle (Max 500 characters): ||| {{text[:500]}}' metadata: !TemplateMetadata choices_in_prompt: false languages: [] From 0faa0da4d6d5a91d927fe112e21ad34c40bd8b78 Mon Sep 17 00:00:00 2001 From: Muennighoff Date: Mon, 15 Aug 2022 10:12:27 +0200 Subject: [PATCH 9/9] Better lens --- promptsource/templates/GEM/xlsum/english/templates.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/promptsource/templates/GEM/xlsum/english/templates.yaml b/promptsource/templates/GEM/xlsum/english/templates.yaml index dbd16fdf5..7c788ceaf 100644 --- a/promptsource/templates/GEM/xlsum/english/templates.yaml +++ b/promptsource/templates/GEM/xlsum/english/templates.yaml @@ -30,7 +30,7 @@ templates: 8d3584c5-8864-4d11-bce9-65499cdef4cb: !Template answer_choices: null id: 8d3584c5-8864-4d11-bce9-65499cdef4cb - jinja: '{{title}}\n{{text[:6000]}}\n\ntl;dr: ||| {{target}}' + jinja: '{{title}}\n{{text[:5000]}}\n\ntl;dr: ||| {{target}}' metadata: !TemplateMetadata choices_in_prompt: false languages: []