From e9718b0cd0595c06f5fa72307bf349362cab9fa2 Mon Sep 17 00:00:00 2001 From: Joep Meindertsma Date: Thu, 4 Jan 2024 10:17:59 +0100 Subject: [PATCH 01/10] WIP scorecard --- src/lib/components/Scorecard.svelte | 56 +++++++++++++++++++++++++++++ src/routes/scorecard/+page.svelte | 13 +++++++ src/routes/scorecard/meta.ts | 10 ++++++ 3 files changed, 79 insertions(+) create mode 100644 src/lib/components/Scorecard.svelte create mode 100644 src/routes/scorecard/+page.svelte create mode 100644 src/routes/scorecard/meta.ts diff --git a/src/lib/components/Scorecard.svelte b/src/lib/components/Scorecard.svelte new file mode 100644 index 00000000..df1df426 --- /dev/null +++ b/src/lib/components/Scorecard.svelte @@ -0,0 +1,56 @@ + + + + + diff --git a/src/routes/scorecard/+page.svelte b/src/routes/scorecard/+page.svelte new file mode 100644 index 00000000..baccefb5 --- /dev/null +++ b/src/routes/scorecard/+page.svelte @@ -0,0 +1,13 @@ + + + + +

{title}

+

AI Companies are building some of the most impactful technology in the world.

+ diff --git a/src/routes/scorecard/meta.ts b/src/routes/scorecard/meta.ts new file mode 100644 index 00000000..20ed0ad4 --- /dev/null +++ b/src/routes/scorecard/meta.ts @@ -0,0 +1,10 @@ +import type { Post } from '$lib/types' + +export const meta: Post = { + title: 'AI Company Safety Scorecard', + description: + 'How much are AI companies doing to prevent disaster? This scorecard is a work in progress.', + date: '2024-01-04', + slug: 'scorecard', + categories: [] +} From e7441b4f18bc389645ddafb7f21689e84661c7f4 Mon Sep 17 00:00:00 2001 From: Joep Meindertsma Date: Thu, 4 Jan 2024 12:36:58 +0100 Subject: [PATCH 02/10] Better scorecard --- src/lib/components/Scorecard.svelte | 56 ------------------- src/posts/learn.md | 1 + src/routes/scorecard/+page.svelte | 16 +++++- src/routes/scorecard/Scorecard.svelte | 58 ++++++++++++++++++++ src/routes/scorecard/TableCell.svelte | 60 +++++++++++++++++++++ src/routes/scorecard/categories.ts | 22 ++++++++ src/routes/scorecard/companies.ts | 78 +++++++++++++++++++++++++++ src/routes/scorecard/types.ts | 21 ++++++++ 8 files changed, 254 insertions(+), 58 deletions(-) delete mode 100644 src/lib/components/Scorecard.svelte create mode 100644 src/routes/scorecard/Scorecard.svelte create mode 100644 src/routes/scorecard/TableCell.svelte create mode 100644 src/routes/scorecard/categories.ts create mode 100644 src/routes/scorecard/companies.ts create mode 100644 src/routes/scorecard/types.ts diff --git a/src/lib/components/Scorecard.svelte b/src/lib/components/Scorecard.svelte deleted file mode 100644 index df1df426..00000000 --- a/src/lib/components/Scorecard.svelte +++ /dev/null @@ -1,56 +0,0 @@ - - -
    - {#each companies as company (company.name)} -
  • -
    {company.name}
    -
    -
    - {company.acknowledge.explanation} -
    -
    {company.acknowledge.score}
    -
    -
  • - {/each} -
- - diff --git a/src/posts/learn.md b/src/posts/learn.md index 39ff718b..cfdc5ca5 100644 --- a/src/posts/learn.md +++ b/src/posts/learn.md @@ -47,6 +47,7 @@ Here are some resources to get you started. - [Human Compatible: Artificial Intelligence and the Problem of Control](https://www.goodreads.com/en/book/show/44767248) (Stuart Russell) - [Our Final Invention: Artificial Intelligence and the End of the Human Era](https://www.goodreads.com/en/book/show/17286699) (James Barrat) - [The Precipice: Existential Risk and the Future of Humanity](https://www.goodreads.com/en/book/show/50963653) (Toby Ord) +- [Uncontrollable](https://www.goodreads.com/book/show/202416160-uncontrollable) (Darren McKee) ## Courses diff --git a/src/routes/scorecard/+page.svelte b/src/routes/scorecard/+page.svelte index baccefb5..c12c2d36 100644 --- a/src/routes/scorecard/+page.svelte +++ b/src/routes/scorecard/+page.svelte @@ -1,6 +1,6 @@ + + + + + + + + + + {#each companies as company (company.name)} + + + {#each categories as category (category.name)} + + {/each} + + + {/each} + +
+ {#each categories as category (category.name)} + + {/each} + +
{company.name}{company.lobby.score + company.acknowledge.score + company.deployment.score}
+ + diff --git a/src/routes/scorecard/TableCell.svelte b/src/routes/scorecard/TableCell.svelte new file mode 100644 index 00000000..9fc72a20 --- /dev/null +++ b/src/routes/scorecard/TableCell.svelte @@ -0,0 +1,60 @@ + + + (showTooltip = true)} on:mouseout={() => (showTooltip = false)}> + {#if title !== undefined} +
+ {title} +
+ {/if} + {#if score !== undefined} +
+ {score} +
+ {/if} + {#if showExplanation} +
+ {explanation} +
+ {/if} + {#if !showExplanation && showTooltip} +
+ {explanation} +
+ {/if} + + + diff --git a/src/routes/scorecard/categories.ts b/src/routes/scorecard/categories.ts new file mode 100644 index 00000000..7aaf4327 --- /dev/null +++ b/src/routes/scorecard/categories.ts @@ -0,0 +1,22 @@ +import type { Category } from './types' + +export const categories: Category[] = [ + { + name: 'Acknowledge', + key: 'acknowledge', + explanation: + 'How well does the company acknowledge the risks of AI? Do they acknowledge all the risks? How long did it take them to acknowledge the risks?' + }, + { + name: 'Lobby', + key: 'lobby', + explanation: + 'How much does the company lobby for AI regulation? Do they lobby for deregulation or for stricter regulation?' + }, + { + name: 'Deployment', + key: 'deployment', + explanation: + 'How long does the company wait before deploying a new model? Do they wait until they have a good understanding of the risks of the model? Are their models tested thoroughly?' + } +] diff --git a/src/routes/scorecard/companies.ts b/src/routes/scorecard/companies.ts new file mode 100644 index 00000000..862f7522 --- /dev/null +++ b/src/routes/scorecard/companies.ts @@ -0,0 +1,78 @@ +import type { Company } from './types' + +export const companies: Company[] = [ + { + name: 'OpenAI', + acknowledge: { + explanation: + "Has now publicly acknowledged most of the AI risks, including existential risk. However, it took them many years to do so. Sam Altman wasn't honest about his 'worst nightmare' during the Senate hearing in May 2023.", + score: 7 + }, + lobby: { + explanation: + 'OpenAI has publicly called for stricter regulation, but also tried to severely weaken how the EU AI Act regulates frontier models.', + score: 4 + }, + deployment: { + explanation: 'GPT-4 was released 7 months after it finished training.', + score: 5 + } + }, + { + name: 'Google DeepMind', + acknowledge: { + explanation: + "Hassabis has now publicly acknowledged the existential risk from AI. They weren't as quick with this as OpenAI", + score: 6 + }, + lobby: { + // https://corporateeurope.org/en/2023/11/byte-byte + explanation: + 'They have lobbied to shift the burden of responsibility onto users of AI instead of the ones building the AI.', + score: 3 + }, + deployment: { + explanation: + 'When releasing Palm 2 in 2023, Google skipped any mention of safety in their release paper. However, with the release of Gemini in december 2023, they have written more extensively on this.', + score: 3 + } + }, + { + name: 'Microsoft', + acknowledge: { + explanation: + // https://www.euronews.com/my-europe/2023/06/29/microsoft-chief-says-ai-is-not-an-existential-risk-to-mankind-but-human-oversight-needed + 'Microsoft does not acknowledge the existential risk from AI.', + score: 2 + }, + lobby: { + explanation: + // https://corporateeurope.org/en/2023/11/byte-byte + 'Microsoft has lobbied to shift the burden of responsibility onto users of AI instead of the ones building the AI.', + score: 3 + }, + deployment: { + explanation: + "Microsoft released an unfinished, sometimes even unhinged Bing (based on OpenAI's GPT-4) in April 2023. It was embarrassing and dangerous. OpenAI urged Microsoft not to do this - they did it anyway.", + score: 0 + } + }, + { + name: 'Meta', + acknowledge: { + explanation: + "Meta's chief scientist Yann LeCun is one of the most notories AI risk deniers, one of the loudest voices in the field. On Twitter he often resorts to ad hominem attacks and refuses to engage honestly.", + score: 0 + }, + lobby: { + explanation: + "Meta doesn't seem to be as active lobbying politicians through backchannels as other companies. They are quite publicly calling for less regulation, though.", + score: 3 + }, + deployment: { + explanation: + 'Meta has leaked and released the weights of powerful AI models. They get some points for improving how much they worked on safety in their latest LLAMA 2 release.', + score: 2 + } + } +] diff --git a/src/routes/scorecard/types.ts b/src/routes/scorecard/types.ts new file mode 100644 index 00000000..c6816b8f --- /dev/null +++ b/src/routes/scorecard/types.ts @@ -0,0 +1,21 @@ +export type Company = { + name: string + title?: string + acknowledge: Score + lobby: Score + deployment: Score +} + +export type Score = { + explanation: string + /** 0 - 10*/ + score: number +} + +export type catagoryTypes = 'acknowledge' | 'lobby' | 'deployment' + +export type Category = { + name: string + key: catagoryTypes + explanation: string +} From 8619374d25fe0485bec471c9b9060ae5ff8d29ea Mon Sep 17 00:00:00 2001 From: Joep Meindertsma Date: Thu, 4 Jan 2024 13:05:52 +0100 Subject: [PATCH 03/10] Added companies, research category --- src/routes/scorecard/Scorecard.svelte | 24 ++++++--- src/routes/scorecard/TableCell.svelte | 21 ++++++-- src/routes/scorecard/categories.ts | 6 +++ src/routes/scorecard/companies.ts | 70 +++++++++++++++++++++++++-- src/routes/scorecard/types.ts | 4 +- 5 files changed, 108 insertions(+), 17 deletions(-) diff --git a/src/routes/scorecard/Scorecard.svelte b/src/routes/scorecard/Scorecard.svelte index 318597df..020460bf 100644 --- a/src/routes/scorecard/Scorecard.svelte +++ b/src/routes/scorecard/Scorecard.svelte @@ -26,15 +26,17 @@ {company.name} {#each categories as category (category.name)} - + {#if company[category.key] === undefined} + + {:else} + + {/if} {/each} - {company.lobby.score + company.acknowledge.score + company.deployment.score} + {company.totalScore} {/each} @@ -52,6 +54,12 @@ vertical-align: top; } + .total { + font-weight: bold; + color: var(--brand); + font-size: 1.2rem; + } + .name { font-weight: bold; } diff --git a/src/routes/scorecard/TableCell.svelte b/src/routes/scorecard/TableCell.svelte index 9fc72a20..8dc18859 100644 --- a/src/routes/scorecard/TableCell.svelte +++ b/src/routes/scorecard/TableCell.svelte @@ -1,9 +1,25 @@ (showTooltip = true)} on:mouseout={() => (showTooltip = false)}> @@ -13,7 +29,7 @@ {/if} {#if score !== undefined} -
+
{score}
{/if} @@ -34,7 +50,6 @@ font-weight: bold; } .score { - color: var(--brand); font-size: 1.2rem; font-weight: bold; } diff --git a/src/routes/scorecard/categories.ts b/src/routes/scorecard/categories.ts index 7aaf4327..74463a54 100644 --- a/src/routes/scorecard/categories.ts +++ b/src/routes/scorecard/categories.ts @@ -18,5 +18,11 @@ export const categories: Category[] = [ key: 'deployment', explanation: 'How long does the company wait before deploying a new model? Do they wait until they have a good understanding of the risks of the model? Are their models tested thoroughly?' + }, + { + name: 'Research', + key: 'research', + explanation: + 'Is the company doing research on AI safety? Are they doing research on how to mitigate the risks of AI?' } ] diff --git a/src/routes/scorecard/companies.ts b/src/routes/scorecard/companies.ts index 862f7522..7306ae86 100644 --- a/src/routes/scorecard/companies.ts +++ b/src/routes/scorecard/companies.ts @@ -1,6 +1,6 @@ import type { Company } from './types' -export const companies: Company[] = [ +const companiesSource: Company[] = [ { name: 'OpenAI', acknowledge: { @@ -14,8 +14,14 @@ export const companies: Company[] = [ score: 4 }, deployment: { - explanation: 'GPT-4 was released 7 months after it finished training.', - score: 5 + explanation: + 'GPT-4 was released 7 months after it finished training, during which they did a lot of safety work like red-teaming by ARC.', + score: 8 + }, + research: { + explanation: + 'OpenAI has published a lot of impactful AI safety research and has dedicated a substantial amount of resources to their "superalignement" project.', + score: 8 } }, { @@ -33,8 +39,12 @@ export const companies: Company[] = [ }, deployment: { explanation: - 'When releasing Palm 2 in 2023, Google skipped any mention of safety in their release paper. However, with the release of Gemini in december 2023, they have written more extensively on this.', - score: 3 + 'Google used to be very careful with releasing models, but that changed in 2023. When releasing Palm 2 in 2023, Google skipped any mention of safety in their release paper. However, with the release of Gemini in december 2023, they have written more extensively on this.', + score: 5 + }, + research: { + explanation: 'Google DeepMind has published quite a few impactful AI safety papers', + score: 5 } }, { @@ -55,6 +65,12 @@ export const companies: Company[] = [ explanation: "Microsoft released an unfinished, sometimes even unhinged Bing (based on OpenAI's GPT-4) in April 2023. It was embarrassing and dangerous. OpenAI urged Microsoft not to do this - they did it anyway.", score: 0 + }, + research: { + explanation: + // https://www.theverge.com/2023/3/13/23638823/microsoft-ethics-society-team-responsible-ai-layoffs + "Microsoft has published almost no safety research and recently laid off their 'ethics and society' team.", + score: 1 } }, { @@ -73,6 +89,50 @@ export const companies: Company[] = [ explanation: 'Meta has leaked and released the weights of powerful AI models. They get some points for improving how much they worked on safety in their latest LLAMA 2 release.', score: 2 + }, + research: { + explanation: + 'Meta has published almost no safety research. They have a few papers on adversarial examples, but nothing on existential risk.', + score: 1 + } + }, + { + name: 'Anthropic', + acknowledge: { + explanation: + 'Anthropic has publicly acknowledged and brought attention to many AI risks, including the existential risk. Their CEO Dario Amodei has been one of the most vocal proponents of AI safety.', + score: 9 + }, + lobby: { + // https://pitchbook.com/news/articles/generative-AI-Capitol-Hill-VC + explanation: + "Anthropic has spent (a little) money lobbying, but it's unclear what they are pushing for.", + score: 7 + }, + deployment: { + explanation: + 'Anthropic was very optimistic about Claude 2 being "unjailbreakable", which was disproved in minutes after releasing the model.', + score: 5 + }, + research: { + explanation: + 'Anthropic has published very important advancements in AI safety research, especially in the field of interpretability.', + score: 9 } } ] + +export const companies: Company[] = companiesSource + .map((company) => { + const { name, acknowledge, lobby, deployment, research } = company + const totalScore = acknowledge.score + lobby.score + deployment.score + research.score + return { + name, + acknowledge, + lobby, + deployment, + research, + totalScore + } + }) + .sort((a, b) => b.totalScore - a.totalScore) diff --git a/src/routes/scorecard/types.ts b/src/routes/scorecard/types.ts index c6816b8f..218bf254 100644 --- a/src/routes/scorecard/types.ts +++ b/src/routes/scorecard/types.ts @@ -4,6 +4,8 @@ export type Company = { acknowledge: Score lobby: Score deployment: Score + research: Score + totalScore?: number } export type Score = { @@ -12,7 +14,7 @@ export type Score = { score: number } -export type catagoryTypes = 'acknowledge' | 'lobby' | 'deployment' +export type catagoryTypes = 'acknowledge' | 'lobby' | 'deployment' | 'research' export type Category = { name: string From fea61b8466be32dd89e92fa398e28a2d5a2be7de Mon Sep 17 00:00:00 2001 From: Joep Meindertsma Date: Thu, 4 Jan 2024 13:29:30 +0100 Subject: [PATCH 04/10] Bigger table toggle --- src/routes/scorecard/Scorecard.svelte | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/routes/scorecard/Scorecard.svelte b/src/routes/scorecard/Scorecard.svelte index 020460bf..ed7ec4ae 100644 --- a/src/routes/scorecard/Scorecard.svelte +++ b/src/routes/scorecard/Scorecard.svelte @@ -11,7 +11,7 @@ Show explanations - +
@@ -45,7 +45,15 @@