diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml new file mode 100644 index 0000000..6a4c33c --- /dev/null +++ b/.github/workflows/deploy.yml @@ -0,0 +1,90 @@ +name: Deploy to GitHub Pages & Gitee Page & Vercel + +on: + push: + branches: + - main + # 如果你想要进一步定义触发条件、路径等,可以查看文档 + # https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#on + +jobs: + deploy: + env: + VERCEL_TOKEN: ${{ secrets.VERCEL_TOKEN }} + VERCEL_ORG_ID: ${{ secrets.VERCEL_ORG_ID }} + VERCEL_PROJECT_ID: ${{ secrets.VERCEL_PROJECT_ID }} + LARK_CHATBOT_HOOK_URL: ${{ secrets.LARK_CHATBOT_HOOK_URL }} + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Deploy to Vercel + id: vercel-deployment + uses: amondnet/vercel-action@v25 + if: ${{ env.VERCEL_TOKEN && env.VERCEL_ORG_ID && env.VERCEL_PROJECT_ID }} + with: + vercel-token: ${{ secrets.VERCEL_TOKEN }} + github-token: ${{ secrets.GITHUB_TOKEN }} + vercel-org-id: ${{ secrets.VERCEL_ORG_ID }} + vercel-project-id: ${{ secrets.VERCEL_PROJECT_ID }} + working-directory: ./ + vercel-args: --prod + + - uses: pnpm/action-setup@v2 + with: + version: 8 + - uses: actions/setup-node@v3 + with: + node-version: 18 + cache: pnpm + + - name: Install dependencies + run: pnpm i + + - name: Build website for GitHub + run: pnpm build + - name: Deploy to GitHub Pages + uses: peaceiris/actions-gh-pages@v3 + with: + personal_token: ${{ secrets.GITHUB_TOKEN }} + # 要发布到 `gh-pages` 分支的构建输出: + publish_dir: ./.vitepress/dist + force_orphan: true + + - name: Build website for gitee + run: VITE_DEPLOYMENT_ENVIRONMENT=gitee pnpm build + - name: Deploy to Gitee Pages + uses: peaceiris/actions-gh-pages@v3 + with: + personal_token: ${{ secrets.GITHUB_TOKEN }} + publish_branch: gitee # default: gh-pages + publish_dir: ./.vitepress/dist + force_orphan: true + + - name: Lark notification + uses: foxundermoon/feishu-action@v2 + if: ${{ env.LARK_CHATBOT_HOOK_URL }} + with: + url: ${{ secrets.LARK_CHATBOT_HOOK_URL }} + msg_type: post + content: | + post: + zh_cn: + title: Vercel 预览环境 + content: + - - tag: text + text: Git 仓库: + - tag: a + text: ${{ github.server_url }}/${{ github.repository }} + href: ${{ github.server_url }}/${{ github.repository }} + - - tag: text + text: 代码分支: + - tag: a + text: ${{ github.ref }} + href: ${{ github.server_url }}/${{ github.repository }}/tree/${{ github.ref_name }} + - - tag: text + text: 预览链接: + - tag: a + text: ${{ steps.vercel-deployment.outputs.preview-url }} + href: ${{ steps.vercel-deployment.outputs.preview-url }} + \ No newline at end of file diff --git a/.github/workflows/pull-request.yml b/.github/workflows/pull-request.yml new file mode 100644 index 0000000..f0c61d2 --- /dev/null +++ b/.github/workflows/pull-request.yml @@ -0,0 +1,54 @@ +name: Pull Request +on: + push: + branches-ignore: + - main +jobs: + Build-and-Deploy: + env: + VERCEL_TOKEN: ${{ secrets.VERCEL_TOKEN }} + VERCEL_ORG_ID: ${{ secrets.VERCEL_ORG_ID }} + VERCEL_PROJECT_ID: ${{ secrets.VERCEL_PROJECT_ID }} + LARK_CHATBOT_HOOK_URL: ${{ secrets.LARK_CHATBOT_HOOK_URL }} + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + if: ${{ env.VERCEL_TOKEN && env.VERCEL_ORG_ID && env.VERCEL_PROJECT_ID }} + + - name: Deploy to Vercel + id: vercel-deployment + uses: amondnet/vercel-action@v25 + if: ${{ env.VERCEL_TOKEN && env.VERCEL_ORG_ID && env.VERCEL_PROJECT_ID }} + with: + vercel-token: ${{ secrets.VERCEL_TOKEN }} + github-token: ${{ secrets.GITHUB_TOKEN }} + vercel-org-id: ${{ secrets.VERCEL_ORG_ID }} + vercel-project-id: ${{ secrets.VERCEL_PROJECT_ID }} + working-directory: ./ + + - name: Lark notification + uses: foxundermoon/feishu-action@v2 + if: ${{ env.LARK_CHATBOT_HOOK_URL }} + with: + url: ${{ secrets.LARK_CHATBOT_HOOK_URL }} + msg_type: post + content: | + post: + zh_cn: + title: Vercel 预览环境 + content: + - - tag: text + text: Git 仓库: + - tag: a + text: ${{ github.server_url }}/${{ github.repository }} + href: ${{ github.server_url }}/${{ github.repository }} + - - tag: text + text: 代码分支: + - tag: a + text: ${{ github.ref }} + href: ${{ github.server_url }}/${{ github.repository }}/tree/${{ github.ref_name }} + - - tag: text + text: 预览链接: + - tag: a + text: ${{ steps.vercel-deployment.outputs.preview-url }} + href: ${{ steps.vercel-deployment.outputs.preview-url }} diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..689e200 --- /dev/null +++ b/.gitignore @@ -0,0 +1,51 @@ +# See https://help.github.com/articles/ignoring-files/ for more about ignoring files. + +# dependencies +/node_modules +/.pnp +.pnp.js + +# testing +/coverage + +# next.js +/.next/ +/out/ + +# PWA +public/sw.js +public/sw.js.map +public/workbox-*.js +public/workbox-*.js.map +public/worker-*.js +public/worker-*.js.map + +# production +/build + +# misc +.DS_Store +*.pem + +# debug +npm-debug.log* +yarn-debug.log* +yarn-error.log* + +# local env files +.env.local +.env.development.local +.env.test.local +.env.production.local + +# vercel +.vercel + +# typescript +*.tsbuildinfo + +# IDE +.vscode/settings.json + +.vitepress/dist +.vitepress/cache diff --git a/.vitepress/config/en.mts b/.vitepress/config/en.mts new file mode 100644 index 0000000..11a50f5 --- /dev/null +++ b/.vitepress/config/en.mts @@ -0,0 +1,37 @@ +import { defineConfig } from 'vitepress' + +// https://vitepress.dev/reference/site-config +export const en = defineConfig({ + lang: 'en-US', + title: "2023 COSR", + description: "2023 China Open Source Report", + + themeConfig: { + nav: [ + { text: 'Home', link: '/en' }, + { text: 'Annual report of previous years', link: 'https://kaiyuanshe.feishu.cn/wiki/wikcnUDeVll6PNzw900yPV71Sxd', target: '_blank' } + ], + + sidebar: [ + { + items: [ + { text: 'Preface', link: '/en/preface' }, + { text: 'OSS Questionnaire', link: '/en/questionnaire' }, + { text: 'OSS Data Analytics', link: '/en/data' }, + { text: 'OSS Commercialization', link: '/en/commercialization' }, + { text: 'OSS Chronicle', link: '/en/open-source-milestones' }, + ] + } + ], + + footer: { + message: 'Released under the CC BY-SA 4.0 License.', + copyright: 'Copyright © 2014-present KAIYUANSHE' + }, + + editLink: { + pattern: 'https://github.com/kaiyuanshe/2023-China-Open-Source-Report/edit/main/:path', + text: 'Edit this page on GitHub' + }, + }, +}) diff --git a/.vitepress/config/index.mts b/.vitepress/config/index.mts new file mode 100644 index 0000000..db5e10a --- /dev/null +++ b/.vitepress/config/index.mts @@ -0,0 +1,13 @@ +import { defineConfig } from 'vitepress' +import { shared } from './shared.mts' +import { en } from './en.mts' +import { zh } from './zh.mts' + +export default defineConfig({ + ...shared, + + locales: { + root: { label: '简体中文', ...zh }, + en: { label: 'English', ...en }, + }, +}) diff --git a/.vitepress/config/shared.mts b/.vitepress/config/shared.mts new file mode 100644 index 0000000..43881d7 --- /dev/null +++ b/.vitepress/config/shared.mts @@ -0,0 +1,47 @@ +import { defineConfig } from 'vitepress'; + +const deploymentEnvironment = process.env.VITE_DEPLOYMENT_ENVIRONMENT; + +export const shared = defineConfig({ + lastUpdated: true, + + base: deploymentEnvironment === 'gitee' ? '/2023-china-open-source-report/' : deploymentEnvironment === 'vercel' ? '' : '/2023-China-Open-Source-Report/', + + head: [ + ['link', { rel: 'icon', type: 'image/x-icon', href: '/image/China-Open-Source-Report.ico' }], + ['link', { rel: 'icon', type: 'image/x-icon', href: '/2023-China-Open-Source-Report/image/China-Open-Source-Report.ico' }], + [ + 'script', + { async: '', src: 'https://www.googletagmanager.com/gtag/js?id=G-7CSQ2KPB1F' } + ], + [ + 'script', + {}, + `window.dataLayer = window.dataLayer || []; + function gtag(){dataLayer.push(arguments);} + gtag('js', new Date()); + gtag('config', 'G-7CSQ2KPB1F');` + ] + ], + + themeConfig: { + logo: '/image/China-Open-Source-Report.png', + + socialLinks: [ + { icon: 'github', link: 'https://github.com/kaiyuanshe/2023-China-Open-Source-Report' }, + { + icon: { + svg: + `` + }, + link: 'https://gitee.com/kaiyuanshe/2023-China-Open-Source-Report' + } + ], + + search: { + provider: 'local' + }, + + externalLinkIcon: true, + } +}) diff --git a/.vitepress/config/zh.mts b/.vitepress/config/zh.mts new file mode 100644 index 0000000..bf22c62 --- /dev/null +++ b/.vitepress/config/zh.mts @@ -0,0 +1,46 @@ +import { defineConfig } from 'vitepress' + +// https://vitepress.dev/reference/site-config +export const zh = defineConfig({ + lang: 'zh-Hans', + title: "2023 中国开源年度报告", + description: "2023 中国开源年度报告", + + themeConfig: { + nav: [ + { text: '首页', link: '/' }, + { text: '往年年报', link: 'https://kaiyuanshe.feishu.cn/wiki/wikcnUDeVll6PNzw900yPV71Sxd', target: '_blank' } + ], + + sidebar: [ + { + items: [ + { text: '卷首语', link: '/preface' }, + { text: '问卷篇', link: '/questionnaire' }, + { text: '数据篇', link: '/data' }, + { text: '商业化篇', link: '/commercialization' }, + { text: '开源大事记', link: '/open-source-milestones' }, + ] + } + ], + + footer: { + message: 'Released under the CC BY-SA 4.0 License.', + copyright: 'Copyright © 2014-present KAIYUANSHE' + }, + + editLink: { + pattern: 'https://github.com/kaiyuanshe/2023-China-Open-Source-Report/edit/main/:path', + text: '在 GitHub 上编辑本页内容' + }, + + lastUpdated: { + text: '更新于', + }, + + docFooter: { + prev: '上一页', + next: '下一页' + }, + }, +}) diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..db200c3 --- /dev/null +++ b/LICENSE @@ -0,0 +1 @@ +Released under the CC BY-SA 4.0 License. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..22ea5c0 --- /dev/null +++ b/README.md @@ -0,0 +1,28 @@ +# 2023 中国开源年度报告 + +![GitHub Repo stars](https://img.shields.io/github/stars/kaiyuanshe/2023-China-Open-Source-Report?style=for-the-badge) +![GitHub contributors](https://img.shields.io/github/contributors/kaiyuanshe/2023-China-Open-Source-Report?style=for-the-badge) +![GitHub last commit](https://img.shields.io/github/last-commit/kaiyuanshe/2023-China-Open-Source-Report?style=for-the-badge) + +在线地址:https://kaiyuanshe.github.io/2023-China-Open-Source-Report/ + +👏 如有问题,欢迎提 issue 或 PR。 + +## 文件目录 + +- 中文版 + - [卷首语](./preface.md) + - [问卷篇](./questionnaire.md) + - [数据篇](./data.md) + - [商业化篇](./commercialization.md) + - [开源大事记](./open-source-milestones.md) +- English + - [Preface](./en/preface.md) + - [OSS Questionnaire](./en/questionnaire.md) + - [OSS Data Analytics](./en/data.md) + - [OSS Commercialization](./en/commercialization.md) + - [OSS Chronicle](./en/open-source-milestones.md) + +## 许可证 + +Released under the CC BY-SA 4.0 License. diff --git a/commercialization.md b/commercialization.md new file mode 100644 index 0000000..16ec72c --- /dev/null +++ b/commercialization.md @@ -0,0 +1,1542 @@ +--- +outline: deep +--- + +# 商业化篇 + +## 一. 概述 + +在过去两年的开源年度报告商业化篇中,介绍了开源软件商业化成功的底层驱动因素、开源软件公司可能的商业化路径、开源项目投资人判断标准以及案例分享。去年,结合当时市场环境下的一些趋势与变革,探讨了国内的开源项目探索全球化市场过程与商业化发展的驱动因素、挑战及实现路径,引发了许多开源伙伴的热烈讨论。 + +2022-2023 年,人工智能领域迎来了预训练大模型技术的大爆发,引发了全社会的广泛关注,并且可预见在未来将持续加深对生活、工作的影响。不难发现,在这次人工智能技术迭代的浪潮中,开源生态也为技术发展起了极大的推动作用,并且有不少开源模型以及开源项目在积极寻求商业化。但开源模型与传统的开源软件又有着众多差异。在这样的时代背景下,人工智能开源项目与开源模型的商业化发展,成为了值得深入研究与讨论的话题。 + +在商业化过程中,包括开源软件和开源模型在内的开源项目的安全可控性是企业用户非常关注的考量因素之一。结合当下技术发展趋势,对开源软件的安全,开源模型的可控,开源商业许可证的分析是值得关注的话题。 + +资本方是促进开源市场发展的重要参与方。对于投资机构来讲,在对一个开源项目进行判断时往往会综合考虑以下几点:在产品开发阶段,重点要看企业是否拥有代码所有权和控制权,以及是否具备国际竞争力;社区运营阶段,主要看企业是否具备足够强的运营能力;在商业化探索阶段,市场匹配能力与商业模式的成熟度会成为主要关注点。 + +作为领域内最早关注并持续耕耘开源的机构,云启资本曾在早期成功发掘并投资了 PingCAP、Zilliz、Jina AI、RisingWave Lab、TabbyML 等开源企业,并持续参与共建开源生态。 + +为了进一步丰富报告内容,今年非常荣幸联合开源社举办了系列闭门讨论 Meetup。我们和数十位行业嘉宾包括微软、谷歌、Apple、Meta、华为、百度等国内外大厂,斯坦福大学、上海交通大学、中科大、UCSD 等高校研究机构,以及国内外大量第一线的创业者们,共同围绕着开源商业化相关的 AI Infrastructure 的发展情况、开源大模型发展情况与数据安全进行深入探讨,部分精华交流收录进了本篇报告中。 + +本章内容由云启资本投资团队撰写,今年所探讨的话题都属于较为前沿的方向,讨论中不乏一些判断与预测,笔者结合工作中与从业者的探讨和展望,提出我们的看法,若有不周到之处或不同想法,欢迎和我们探讨交流。 + +主要内容包括: + +**开源生态助力 AI 快速发展** + +**开源安全挑战** + +**开源项目资本市场情况** + +## 二. 开源生态助力 AI 快速发展 + +### 2.1 预训练大模型迅速发展,开源功不可没 + +#### 2.1.1 预训练大模型发展迅猛 + +在过去的几年中,预训练大模型的发展是突破性的,它们已成为人工智能领域的一个重要标志。这些模型,不仅在规模上日益庞大,而且在智能处理能力上也取得了巨大的飞跃。从处理语言的复杂性到解析图像的细腻度,再到执行高级数据分析的深度,这些模型展现了前所未有的能力和精度。特别是在自然语言处理(NLP)领域,如 GPT 系列的预训练大模型,通过学习大量的文本数据,已经能够模拟复杂的人类语言,进行高质量的文本生成、翻译和理解。这些模型不仅在语言的流畅性上有了显著提升,而且在理解语境、捕捉细微的语言差异方面也表现出越来越强的能力。 + +此外,这些大型模型在执行复杂数据分析方面的表现也极为出色。它们能够从庞大的数据集中提取出有意义的模式和关联,为科研、金融分析、市场预测等多个领域提供支持。值得一提的是,这些模型的发展并不仅仅局限于它们自身的提升。随着这些模型的普及和应用,它们正在推动整个行业和全社会的技术进步,促进新应用的产生,如智能助手、自动化写作工具、先进的诊断系统等等。它们的发展为未来人工智能的应用和研究开辟了更多新的发展方向,预示着新一轮的技术革新的到来。 + +广大用户对于 AI 的热情迅速攀升,相比于 TikTok 的 9 个月,ChatGPT 只用了 2 个月便达到一亿用户。这不仅是一个商业上的巨大成功,也是人工智能技术发展史上的一个重要里程碑。 + +
+ +| ![image001](/image/commercialization/chapter_2/2-1.png) | +|-------------------------------------------------------| + +
+ +
图 2.1 各大应用到达 1 亿用户所用的时间(月份数)
+
+ +伴随着 AI 热度的不断增长,全球 AI 的市场规模也迅速增长,根据德勤的数据,在 2017-2022 年中,全球 AI 的市场规模年复合增长率达 23%,2025 年预计将达到七万亿美元的规模。 + +
+ +| ![image002](/image/commercialization/chapter_2/2-2.png) | +|-------------------------------------------------------| + +
+ +
图 2.2 全球 AI 市场规模(万亿美元)
+ +#### 2.1.2 开源力量助推 AI + +预训练模型能取得如此巨大的进步,来自开源生态的力量发挥了重要作用。其中不但包括来自学术界的研究支持,也有来自产业界的助力,在开源生态的共同努力之下,开源基座大模型的性能快速发展,逐渐比肩闭源。 + +**来自学术界的开源力量极大推动了 AI 技术的演进** + +自 2009 年普林斯顿大学发表 ImageNET 这一计算机视觉领域的里程碑论文后,AI 机器学习相关的论文开始逐渐增长,与之同时伴随着大量研究者们开源的算法不断被提出。2017 年,在 Arxiv 上 AI 机器学习论文已达 2.5 万余篇,伴随着横空出世的《Attention Is All You Need》提出了开源的 Transformer 模型,有关大模型的相关研究和论文进入了集中爆发的时间,自 2017 至 2023 的六年间,Arxiv 大模型相关的论文激增至 10 万余篇。这也极大带动了相关模型的开源进程,为之后的大模型技术爆发储备了理论基础。 + +
+ +| ![image003](/image/commercialization/chapter_2/2-3.png) | +|-------------------------------------------------------| +
+ +
图 2.3 Arxiv 上 AI / 机器学习相关论文的累计发表数量
+
+ +::: info 专家点评 +**姜宁**:这一洞察还是挺振奋的, 学术开源扮演非常大的作用。 +::: + +**产业界的开源力量助力大模型的快速发展** + +随着 ChatGPT 引爆大模型的热潮,越来越多技术人员投身至大模型的研究与开发。除了闭源产品外,也有着大量优秀的开源大模型引领着产业发展的潮流。2022 年的 Stable Diffusion 凭借其强大的文生图能力以及广大的社区力量,一经推出便迅速追赶上著名闭源文生图大模型 Midjourney,并且在某些层面已经呈现领先之势;以 Meta LLaMA 2 为代表的开源大语言模型强悍的能力,令谷歌研究人员感慨“我们没有护城河,OpenAI 也没有”;还有各个领域不断涌现出的开源佼佼者,如 Dolly、Falcon 等等。开源大模型以其强大的社区资源,更低廉的使用成本,迅速获得大量企业和个人用户的青睐,是推动大模型发展的不可或缺的力量。 + +
+ +| ![image004](/image/commercialization/chapter_2/2-4.png) | +|-------------------------------------------------------| + +
+ +
图 2.4 不断涌现的开源大模型
+
+ +**开源大模型的表现正在迅速追赶闭源** + +以 OpenAI ChatGPT4 为代表的闭源大模型起步较早,其参数量与各项性能指标在早期都呈现出优于开源模型的趋势。但是得益于开源模型广泛的社区贡献力量,以及开源公司本身强大的技术力量,开源大模型的性能正在迅速追赶上闭源大模型。如下图所示,最为成熟的 ChatGPT4 得分为 1,181,而推出不到 4 个月的 LLAMA2 模型也已经拿到了 1,051 的高分,二者仅有 11% 的差距(相较于 ChatGPT4)。同时令人欣喜的是,排名 4-9 位均为开源大模型。这说明开源大模型性能的快速发展并非个例,而是行业的趋势。相比于闭源大模型,开源大模型由于使用成本明显低于闭源模型 API,同时性能差距较小,因此开源大模型具有非常高的性价比,吸引着广大 B 端、C 端的用户,关于成本的讨论将在后文详细展开。 + +得益于开源模型的开源特性,使用者可以更方便地对大模型进行微调以适应不同的垂直应用场景。经过微调的大模型更加具有行业特性,相比于通用大模型也更加适合特定行业的应用,这是闭源模型所不具有的优点。 + +
+ +| ![image005](/image/commercialization/chapter_2/2-5.png) | +|-------------------------------------------------------| + +
+ +
图 2.5 ELO 基于用户反馈对于大模型的评级
+ +#### 2.1.3 大模型的三个层次 + +如下图所示,大模型的技术架构主要分为如下层次。其中开源已做出大量贡献的包括模型层、开发者工具(开发工具)层和应用层。每一层都有其独特的功能和重要性,共同构成了大型模型技术的完整架构,这将在后续的三个部分(2.2,2.3,2.4)详细讨论。 + +
+ +| ![image006](/image/commercialization/chapter_2/2-6.png) | +|-------------------------------------------------------| + +
+ +
图 2.6 大模型的技术层次
+
+ +- **模型层** + +模型层是整个架构的基础,包括了构成大模型的核心算法和计算框架,典型的模型如 GPT、Diffusion 等,是生成式 AI 的核心。这一层涉及到模型的训练,包括大量数据的预处理、特征提取、模型优化和参数调整等。模型层的关键是高效的算法设计和大规模的数据处理能力。 + +- **开发工具层** + +开发工具层提供了必要的工具和平台,以支持大模型的开发和部署。包括各种机器学习框架(如 TensorFlow、PyTorch)和 API,这些工具简化了模型的构建、训练和测试过程。开发工具层还可能包括为模型训练和部署提供支持的云服务和计算资源。此外,这一层还负责模型的版本控制、测试、维护和更新等。 + +- **应用层** + +应用层主要考虑如何将大模型能力接入实际应用中。在这一层,模型被集成到具体的业务场景中,如智能助理、自动化客服、个性化推荐系统等。应用层的关键在于如何将复杂的模型技术转化为用户友好、高效且有价值的应用,同时确保其良好的性能和可扩展性。 + +整体来看,这三个层次相互依赖,共同构成了大模型技术的完整架构,从模型的基本构建到具体应用的实现,每一层都扮演着各自重要的角色。接下来会详细讨论三层次各自对应的开源内容。 + +### 2.2 开源是助推基座模型发展的第二动力 + +#### 2.2.1 供给侧:集中力量,促进研发 + +**节省开发人员数量,集中研发能力** + +有底层基座模型研发能力的技术人员数量有限,开源才能促进更多上层功能的研发。根据工信部发布的相关数据,人工智能不同技术方向岗位的人才供需比均低于 0.4,我国 AI 人才还处于较为缺乏的状态。大模型由于相对前沿、技术难度大,要求开发人员对在底层算法、数据结构、算法优化等方面有较高的技术功底,因此相关人才更加缺乏,人员薪资要求较高。中小企业无力支撑模型自研团队,但同时又有部署大模型的需求,因此开源大模型有利于缓解中小企业的技术压力,使得更多的开发者和研究者能够直接访问到先进的 AI 技术,避免了从零开始构建模型的需要。这不仅节省了大量的开发时间,还降低了进入门槛,使得即使是资源有限的团队也能利用这些高级模型来开展研究和开发。 + +基于高效的预训练模型基座,开发者可以直接进行有针对性地创新和改进,而不是分散精力于基础架构的建设。这种集中精力于创新而非基础建设的做法,极大地推动了技术的快速进步和应用领域的扩展。同时,开源模型的共享也促进了知识和技术的传播,为全球的开发者提供了学习和合作的平台,这在推动全行业的整体进步方面起到了关键作用。 + +**节省算力,避免重复造轮子** + +随着基座大模型性能的不断增强,其参数量也不断增大,相比于 5 年前暴涨 1,000 倍。根据测算,ChatGPT 芯片需求为 3 万多片英伟达 A100 GPU,对应初始投入成本约为 8 亿美元,每日电费在 5 万美元左右。训练基座模型的算力需求越来越大,成本越来越高,因此重复造轮子是一件非常浪费资源的事情。再加上美国对英伟达 A100/H100 供给中国大陆的禁令,国内企业训练基座大模型愈发困难。此时开源预训练大模型便成为了非常好地选择,可以解决当下的窘境,让更多公司可以基于基座大模型进行二次开发。 + +大模型训练共需要四个步骤:预训练(pre-training),监督式微调(supervised fine tuning),奖励模型(reward modeling)和强化学习(reinforcement learning)。预训练的算力时长占据整个训练周期的 99% 以上,由此,开源模型可以帮助大模型平台开发者们直接跳过 99% 的成本步骤,从而将有限的资金、时间投入到更加有针对性的微调步骤中,这对于广大的应用层开发者是一个重大帮助。大量中小企业需要模型服务者为其量身定做模型,而开源生态恰恰可以为大模型的二次开发节省大量成本,因此可以推动大量初创公司的诞生。 + +
+ +| ![image007](/image/commercialization/chapter_2/2-7.png) | +|-------------------------------------------------------| + +
+ +
图 2.7 大模型参数量越来越大
+
+ +**开源可以探索更广泛的技术可能性** + +震惊世界的 Transformer 模型是否为最优解,目前没有答案;下一个更好的方向是否是 RNN(Recurrent Neural Network - 循环神经网络) 也依然存在疑问?但正是开源的生态,使得开发者可以在这棵 AI 大树的不同枝干上进行尝试,不同枝干都会凝聚着各种新生的开发力量,保证了技术发展的多样性,从而让人类对于大模型的探索不会拘泥于局部最优解,真正推动 AI 技术的向各个方向不断发展的可能性。 + +#### 2.2.2 需求侧:降低门槛,抢占市场 + +**开源模型显著降低模型使用者的成本** + +虽然部署开源模型需要一定的初始投入成本,但随着使用量的增加,部署开源模型呈现出规模效应,使用成本相对闭源更加经济和可控。对于日均请求频次低于一定水平的使用场景,直接调用 API 花费较低;但是对于较高请求频次,部署开源模型成本更低,用户应当根据实际的使用量选择合适的方式。 +
+ +
+ +| ![image008](/image/commercialization/chapter_2/2-8.png) | +|-------------------------------------------------------| + +
+ +
图 2.8 调用 OpenAI API 和 AWS 云上部署开源模型的成本对比
+
+ +以直接调用 OpenAI 的 API 和公有云上部署 Flan UL2 模型对比为例: + +根据 OpenAI 官网最新数据,使用 ChatGPT4 模型,输入为 0.03 美元 / 1000 tokens,输出为 0.06 美元 / 1000 tokens。考虑输入与输出的关系,假定平均成本为 0.04 美元 / 1000 tokens。每个 token 约为一个英文单词的 3/4,一条请求内的 token 数量等于提示词 + 所生成的输出 token。假设一个文本块为 500 个单词,即约 670 个 token,那么一个文本块的成本为 +670×0.004/1000=0.00268 美元。 + +而如果基于 AWS 云端部署开源模型,以 AWS 发布的相关教程中提到的 200 亿参数的 Flan UL2 模型为例,其成本共分为三个部分: + +- 使用 AWS SageMaker 将模型部署为端点的固定成本,每小时约 5-6 美元,一天约 150 美元 +- 将 SageMaker 端点接入 AWS Lambda:假定 5s 内向用户返回响应,使用 128MB 内存。每条请求的价格为:5000×0.0000000021(128MB +每毫秒单价)=0.00001 美元 +- 通过 API Gateway 将此 Lambda 函数开放为 API:Gateway 的价格约为 1 美元 / 100 万条请求,即 0.000001 美元 / 每条请求。 + +基于以上数据,可以计算出在一天之内,当请求数量为 56,200 条时,二者总成本相等。在请求数量达到 10 万条 / 天的时候,ChatGPT4 的使用成本约为 268 美元,而开源大模型的成本为 151 美元;在请求数量达到 100 万条 / 天时,ChatGPT4 的使用成本约为 2,680 美元,而开源大模型的成本为 161 美元。可以发现,随着请求量的增加,开源大模型对于成本的节约是显著的。 + +**开源提高模型的可解释性和透明度,降低技术采纳的门槛** + +开源模型比封闭模型更容易评估。开源模型开源了其预训练结果,有些甚至开源了其训练的数据集、模型架构等,这都有利于研究人员和使用者对大模型进行深入分析,了解其优缺点。科学家和开发者遍布全球,他们可以相互审查、评估、探究并理解基础原理,进而增强安全性、可靠性、可解释性和信任感。此外,广泛地分享知识对于推动技术进步极为重要,同时也有助于降低技术被误用的可能性;闭源模型通常只能通过性能测试进行评估,本身是一个 “黑箱”,其优缺点、适用场景等不能被清晰地测评,本身可解释性与透明度显著低于开源模型。 + +同时闭源模型存在着被质疑原创性的风险。使用者不能确定闭源模型是否真正为原创,导致可能存在的版权、技术支持可持续性问题的担忧。开源模型由于代码可查,因此可以清晰判断其原创性,令使用者更加信服。根据 Hugging Face 技术人员评论,相比闭源大模型的黑箱,像 Llama2 这样的开源模型,公布了训练数据、方法、标注等细节,「透明化的文章都出来了,代码也发出来了,用起来你放心,知道里面有什么」。 + +更高的可解释性和透明度,有利于增强使用者尤其是 B 端用户对于大模型的信任。 + +**企业用户可以通过开源基座模型实现特定需求** + +企业用户具有多种类的特定需求,如:行业特征微调、本地部署保证隐私等。 + +目前随着大模型参数量不断增加,训练成本不断攀升,一味地提高大模型参数量并不是提高性能的最优解;而针对于特定问题的微调,反而能迅速提高大模型的针对性性能,达到事半功倍的效果。例如微软基于 LLaMA2 微调出的数学开源大模型 WizardMath,其参数只有 700 亿,但是经过 GSM8k 数据集测试,WizardMath 数学能力直接击败了 ChatGPT、Claude Instant 1、PaLM 2-540B 等一众大模型,这充分说明了微调对于提升大模型专业解决能力的重要作用,也是开源大模型的一大优势。 +
+ +
+ +| ![image009](/image/commercialization/chapter_2/2-9.png) | +|-------------------------------------------------------| + +
+ +
图 2.9 WizardMath 的性能排名
+
+ +大量企业用户对数据隐私有极高要求,开源大模型本地部署的能力极大地保护了企业的隐私。企业在调用闭源大模型时,闭源模型始终部署在诸如 OpenAI 等公司服务器上,企业只能将自己的数据远程发送到大模型公司服务器,这对于企业的隐私保护非常不利,中国的企业还面临相关的合规问题。而开源大模型则可以实现本地部署,企业所有数据在公司内部处理,甚至可以离线处理,极大保护了企业的数据安全。 + +**开源模型有利于客户的长久体验** + +对于企业,建立一个很好的数据集,可以应对开源模型不断的迭代。开源模型可以针对企业特定数据进行微调,微调的数据集质量要求很高。对于开源模型的使用企业,花费成本得到的一个数据集,可以用来微调很多模型,即发挥以逸待劳的作用:企业使用的大模型可以随着技术的发展不断更换,例如由 LLaMA1 提升至 LLaMA2,但其花费成本制作的数据集却不用再更换,这样从长远来看,可以保证企业以更小的成本,实现模型能力的不断提升。 + +开源模型的更新速度很快,满足用户变化的需求。在开源社区研发力量的加持下,开源大模型的欠缺被迅速补足。LLaMA2 本身欠缺中文语料,导致在中文理解方面令人不甚满意;但是仅在 LLaMA2 开源次日,社区就出现了首个能下载、能运行的开源中文 LLaMA2 模型 “Chinese LLaMA27B”。充足的社区力量支持,可以满足用户不同的需求,而闭源公司通常无法如此全面地照顾到各类用户的不同需求。 + +**开源有助于抢占市场先机** + +开源模型由于进入门槛低的特点,用户更易接触,可以迅速拓展市场。Stable Diffusion,一款开源的图像生成模型,凭借其庞大的开发者社区和多元化的应用场景,已成为闭源文生图模型 MidJourney 的重要竞争对手。尽管在某些方面不及 MidJourney,但 Stable Diffusion 凭借其开源和免费的特点,在图像生成市场上占据了重要份额,成为领域内最受欢迎的模型之一。这一成功也为其背后的公司 RunwayML 和 Stability AI 带来了广泛的关注和投资。 + +#### 2.2.3 生态侧:汇聚多元,长久增长 + +**开源有利于大模型公司迅速抢占生态资源** + +开源模型的低门槛、易得性还会帮助模型迅速占领相关生态资源。Stable Diffusion 这个开源项目在全球范围内获得了众多自由开发者的积极响应和支持。其中,许多热心的程序员积极参与,为其打造了易于使用的图形用户界面(GUI)。大量的 LoRA 模块被开发出来,为 Stable Diffusion 提供加速出图、绘制更生动的图像等功能。大量相关应用软件也相继诞生,根据 Stable Diffusion 官方网站的数据显示,在 Stable Diffusion 2.0 版本发布一个月后,位于苹果应用商店前十名的应用中,就有四款是基于 Stable Diffusion 技术开发的人工智能绘画应用。繁荣的生态成为 Stable Diffusion 坚实的根基。 + +在开源大模型 LLaMA2 最初发布时,Github 上包含 “LLaMA” 关键词的项目有 5600 个,包含 “GPT4” 关键词的项目有 4100 个。经过两周,LLaMA 相关生态以明显更高的速度增长,其相关项目数量达到 6200 个,而 “GPT4” 相关项目为 4400 个。对于大模型公司,生态相当于市场、技术力量和发展的不竭动力。开源以其更低的门槛,能够比闭源模型更快地抢占生态资源。因此相关开源大模型公司应抓住这一优势,加强同社区开发者的沟通,为其提供足够的支持和帮助,促进相关模型生态的迅速发展。 + +**开源有利于大模型厂商撬动市场,获取商业同盟** + +LLaMA2 商用开源后,Meta 迅速与微软、高通达成合作。作为 OpenAI 的大股东,微软选择与开源厂商 Meta 达成合作更意味着开源成为了不可忽视的一股力量。对于未来的合作,Meta 表示微软 Azure 云服务的用户,在云上就可以直接微调部署 Llama2,微软表示,Llama2 已经针对 Windows 进行优化,直接可以在 Windows 本地运行。 + +二者的结合充分彰显了开源大模型与云厂商具有天然的合作基础。无独有偶,国内的开源大模型也有类似的趋势:百度的文心千帆、阿里的通译千问均为开源大模型,虽然用户使用开源大模型通常不需要付费,但用户们需要百度云和阿里云作为算力平台,需要为算力付费。 + +Meta 与高通的合作也预示着其在手机领域的扩展。开源大模型由于其受众面广,可以本地部署等优势,手机成为未来便捷使用大模型的重要载体。这也吸引着手机芯片厂商与开源模型厂商进行合作。 + +综上所述,开源大模型以其广泛的触达作用,有利于背后公司寻找合作伙伴,撬动市场。 + +**开源可以调集广大的社区力量,汇聚多元的开发力量** + +广大的社区力量一直是开源的重要优势。如下图所示,Github 上生成式 AI 项目在 2022 年实现快速增长,自 1.7 万飙升至 6 万,迅速增长的社区不仅可以迅速为开源大模型的开发公司提供大量的技术反馈,还可以充分提升开源大模型的末端触达力,通过微调将开源模型应用于各种垂直领域,为大模型带来更多用户。 + +
+ +| ![image010](/image/commercialization/chapter_2/2-10.png) | +|-------------------------------------------------------| + +
+ +
图 2.10 开源社区 Github 上生成式 AI 相关项目的数量变化(信息源:Github)
+
+ +相比于闭源模型,开源大模型会收到来自不同地区、不同文化、不同技术背景开发者的贡献。如下图所示除了美国,来自中国、印度、日本、巴西等世界各地的贡献者,都为生成式 AI 的开源社区做出了巨大贡献。他们的加入将使得开源大模型更能适应于不同地区的风土人情:例如对应语言的微调、对应产业的微调、不同使用习惯的微调,从而提高了开源大模型的受众面。 +
+ +
+ +| ![image011](/image/commercialization/chapter_2/2-11.png) | +|-------------------------------------------------------| + +
+ +
图 2.11 生成式 AI 贡献者的地域分布 TOP10(信息源:Github)
+
+ +**国内开源基座模型蓬勃发展,紧跟全球领先步伐** + +基于国内的科技企业生态,国内的开源预训练基座大模型也正蓬勃发展,紧跟全球领先步伐。 + +6 月清华系 ChatGLM 升级到第二代,当时在中文圈(中文 C-Eval 榜单)里拿下了 “榜首” 的好成绩,10 月推出的 ChatGLM3 不仅在多模态层面性能直逼 GPT-4V,也是国内首个具备代码交互能力的大模型产品(Code Interpreter)。 + +同在 10 月,悟道天鹰 Aquila 大语言模型系列已经全面升级到 Aquila2,并且再添了 340 亿参数的 Aquila2-34B,当时在代码生成、考试、理解、推理、语言四个维度的 22 个评测基准上,Aquila2-34B 强势霸占了多个榜单 TOP 1。 + +11 月 6 日, 李开复老师亲自带队的大模型创业公司零一万物,正式开源发布首款预训练大模型 Yi-34B,在包括 Hugging Face 的开源大模型排行榜(Open LLM Leaderboard)等多项排行榜中取得惊人成绩。 + +12 月,阿里云通义千问 720 亿参数的模型 Qwen-72B 力压 Llama 2 等国内外开源大模型,登顶全球最大模型社区 Hugging Face 的开源大模型排行榜(Open LLM Leaderboard)榜首。 + +国内的开源预训练基座大模型也远不止以上几个,蓬勃发展的开源预训练基座大模型生态令人可喜,其中不但有学术机构、互联网巨头,也有部分优秀的创业公司,报告末尾统计了目前已开源的模型的初创公司与模型情况。 + +#### 2.2.4 开源大模型的商业化实现路径 + +当前,我们正处在开源大模型技术快速发展的时代,这一领域虽然前景广阔,但也面临着显著的商业模式探索的挑战。本段落基于与从业者的交流、案例调研,尝试归纳出现阶段的一些商业化探索方向。 + +**提供支持服务** + +随着越来越多基础开源技术的出现,软件的复杂性和专业性都大幅度提高,用户对软件稳定性的需求也同步提升,需要专业的技术支持。此时便出现了以 +Redhat 为代表的企业开始尝试基于开源软件实现商业化运营,主要的商业模式为 “Support 支持服务”模式,为使用开源软件的客户提供付费的技术支持与咨询服务。目前的基座模型整体复杂度、专业度较高,用户同样需要专业的技术支持。 + +在大模型领域,智谱 AI 的商业模式与 Redhat 较为相似。其为企业提供自研大模型 ChatGLM 的本地私有化部署服务,提供高效的数据处理、模型训练和部署服务。提供智谱大模型文件和相关的工具包,用户可以自行训练微调和部署推理服务,在此之上智谱会提供部署应用相关的技术支持和咨询,一级模型的更新。通过该方案,企业可以实现数据的完全掌控和模型的安全运行。 +
+ +
+ +| ![image012](/image/commercialization/chapter_2/2-12.png) | +|-------------------------------------------------------| + +
+ +
图 2.12 智谱 AI 的私有化部署定价模式
+
+ +**提供云托管服务** + +自云计算技术发展以来,云增长持续超出预期。不断增长的对灵活和可扩展基础设施的需求推动了 IT 企业的云计算支出与全球范围内云渗透率的不断提高。在这样的技术背景下,用户对降低软件运维成本的需求不断增加。云托管服务是指通过 SaaS 使客户跳过内部部署直接将软件作为服务托管在云平台上。客户通过订阅 SaaS 服务,将前期高额的资本性支出转为小额的经常性支出,并在很大程度上缓解了运维压力。目前比较成功的开源软件公司包括 +Databricks、HashiCorp 等。 + +在大模型领域,智谱 AI 直接提供基于 ChatGLM 的标准 API 产品,以便客户快速构建自己专属的大模型应用,按照模型实际处理文本的 token 数量计量计费。该服务适用于对知识量、推理能力、创造力要求较高的场景,比如广告文案、小说写作、知识类写作、代码生成等,定价为:0.005 元 / 千 tokens。 + +同时智谱 AI 还提供超拟人大模型(支持基于人设的角色扮演、超长多轮的记忆、千人千面的角色对话)、向量大模型(将输入的文本信息进行向量化表示,以便于结合向量数据库,为大模型提供外部知识库,提高大模型推理的准确性)等 API 接口。 + +Hugging Face 也提供了云托管的商业模式。Hugging Face 平台托管了大量的开源模型,还提供了基于云的解决方案, Hugging Face Inference API,允许用户通过 API 轻松地在云端部署和运行这些模型。这种模式结合了开源模型的可访问性和云托管的便利性,用户可以按需求使用,无需自行设置和管理庞大的基础设施。 +
+ +
+ +| ![image013](/image/commercialization/chapter_2/2-13.png) | +|-------------------------------------------------------| + +
+ +
图 2.13 Hugging Face 云平台收费
+
+ +**基于基座模型开发商业应用** + +基于基座模型收取费用,指部分开源厂商本身的基座模型是免费开源的,但是厂商基于基座模型又开发出一系列商业应用,并针对商业应用进行收费的模式,典型案例如通义千问。 + +阿里云基于旗下的开源基座模型通义千问,开发出八大应用:通义听悟(语音识别)、通义晓蜜(提升客服效率)、通义智文(文字理解)、通义星尘(个性化角色)、通义灵码(辅助编程)、通义法睿(法律行业)、通义仁心(医药行业)、通义点金(金融行业)。这些应用都有对应的企业级付费模式。同时部分应用也包含个人端的收费模式,如通义听悟。,主要提供会议纪要等语音转文字的相关服务,其收费标准主要基于音频时长计算。 +
+ +
+ +| ![image014](/image/commercialization/chapter_2/2-14.png) | +|-------------------------------------------------------| + +
+ +
图 2.14 通义听悟收费模式
+
+ +**“模型即服务” 的商业模式** + +模型即服务(英文简称:MaaS)最底层的含义是要把模型作为重要的生产元素,围绕模型的生命周期设计产品和技术,从模型的开发入手,包括数据处理、特征工程、模型的训练和调优、模型的服务等,提供各种各样的产品和技术。 + +阿里云发起的 “魔搭社区” 为 MaaS 的倡导者,为了实现 MaaS,阿里云进行了两方面的准备:一是提供模型仓库,收集模型,提供优质数据,还可针对业务场景调优。模型使用和算力需要结合在一起,以便提供快速体验模型,让广大开发者无需写代码就能快速体验模型的效果。二是提供抽象接口或 API 接口,以便开发者针对模型进行二次开发。在面对具体应用场景时,提供少样本或者零样本的方式,便于开发者对模型进行二次优化,真正让模型应用到不同的场景中。 +
+ +
+ +| ![image015](/image/commercialization/chapter_2/2-15.png) | +|-------------------------------------------------------| + +
+ +
图 2.15 阿里云:模型即服务
+
+ +**大模型商业模式需要勇于探索与尝试** + +目前,开源大模型公司商业路径尚未获得市场验证,因此大量公司都在积极探索不同的商业模式,而不拘泥于单一的定价策略。但到目前为止,还没有找到有效的商业模式来覆盖其高昂的开发和运维成本,从而导致它们在经济上的可持续性存疑。这一情况在一定程度上反映了这个新兴行业的特性:虽然技术上取得了突破性进展,但如何将这些技术转化为经济效益,仍是一个待解的问题。 + +然而,值得注意的是,尽管面临这样的挑战,开源大模型的兴起和发展仍然标志着一个新的业态的诞生。这个业态具有其独特的价值和潜力,为各种行业提供了前所未有的技术支持和创新可能性。在这个过程中,各方参与者(包括研究机构、企业、开发者和用户)都在积极探索,试图找到能够平衡技术创新与经济回报的模式。 + +这种探索并非一蹴而就,它需要时间、实验以及对市场和技术趋势的深入理解。我们可能会看到各种创新的商业模式出现,如上文提到的技术支持服务、云托管、MaaS 等。尽管当前这些开源大模型的商业模式尚未成熟,但正是这种探索和实验,将推动整个大模型领域向前发展,最终找到可持续增长且有利润回报的商业路径。 + +### 2.3 AI 开发者工具开源已成为行业阶段性共识 + +#### 2.3.1 开发者工具在 AI 产业链中发挥着重要作用 + +开发工具(Develop Tools)层,是 AI 大模型开发链条中重要的一环。如下图所示,开发工具层起到承上启下、链接中层的作用: + +对于承接算力资源,开发工具层起到了类似 PaaS 的作用。基于云平台帮助大模型开发者更加简便地部署算力、开发环境、调用以及分配资源,让其能够专注于模型开发的逻辑和功能,实现本身的创新。 + +对于链接预训练模型,开发工具层提供一系列工具加速模型层的开发,包括数据集清洗、标注工具等。 +
+ +
+ +| ![image016](/image/commercialization/chapter_2/2-16.png) | +|-------------------------------------------------------| + +
+ +
图 2.16 开发者工具在 AI 大模型产业链中的位置
+
+ +对于推动 AI 应用开发,开发者工具层对企业、个人开发者最终产品的开发部署都起到重要帮助作用。对于企业开发者,开发者工具帮助实现行业大模型的部署、以及模型的监控,以保证企业模型的正常运行。还有其他相关功能,包括模型评估、模型运行过程数据库推理和补充等。对于个人开发者,开发者工具帮助他们简化部署步骤、降低开发成本,激励了更多针对特定功能的微调模型的诞生,例如 Hugging Face 推出的 Autotrain 可以帮助开发者只需要点几下鼠标就能基于私人数据对开源模型进行微调。同时开发者工具也帮助建立终端用户与大模型 APP 之间的连接,甚至大模型在终端用户设备的部署。 + +随着开发工具的日益成熟和进步,越来越多的开发者开始涉足大模型的相关开发。这些工具不仅提高了开发效率,还降低了进入门槛,使得更多具有创新思维的人才能够参与到这个领域。从数据处理、模型训练到性能优化,这些工具为开发者提供了全面的支持。因此,我们见证了一个多样化、活跃的大模型开发社区的诞生,其中不乏一些前沿的项目和创新的应用。 +
+ +
+ +| ![image017](/image/commercialization/chapter_2/2-17.png) | +|-------------------------------------------------------| + +
+ +
图 2.17 越来越多的 AI 大模型开发者
+
+ +目前的大模型开发工具百花齐放,这些工具涵盖了从数据准备、模型构建、到性能调优的各个环节,不断推动着人工智能技术的前沿发展。有的工具专注于数据标注和清洗,让开发者能够更轻易地获得高质量数据;有的工具致力于提高微调效率,使得大模型更符合定制化需求;还有的工具负责大模型的运行监控,以便及时反馈给开发者、使用者。这些多样化的工具不仅促进了技术创新,也为开发者提供了更多选择,共同构建了一个充满活力和创造力的大模型开发生态系统。其中不乏有大量开源的优秀项目,为使用者和开源公司都带来了较大的效益。 +
+ +
+ +| ![image018](/image/commercialization/chapter_2/2-18.png) | +|-------------------------------------------------------| + +
+ +
图 2.18 大量开发工具覆盖大模型开发的不同层面
+
+ +#### 2.3.2 开发者工具开源有重要意义 + +**供给侧效益** + +开源开发者工具有利于使产品在不同场景中经历打磨洗礼、升级换代,有助于产品的快速成熟。开源开发者工具的一个主要优势在于它们提供了一个广泛的测试和应用环境。由于开源工具可提供不同的用户和组织自由使用和修改,这些工具经常在多样化的实际场景中得到应用和测试,从而经受“实战”的考验。这种广泛的使用和反馈有助于产品更快地发现并修复潜在的缺陷,同时也促进了新功能的开发和现有功能的改进。尤其是对于初创开发者工具公司来说,这是最快和最经济获得产品反馈、促进产品改进的方式,有助于快速向市场推出较为成熟的商业化产品。 + +开发者工具类底层产品用户粘性高,开源有利于迅速铺开市场。前文提到,开发者工具包含大量大模型开发过程中不可缺少的组成部分。一旦开发者习惯了特定的工具,他们往往会持续使用这些工具,因为改变工具意味着需要重新学习和适应新工具的特性和用法。因此,这类产品自然具有较高的用户粘性。 +
+ +| ![image019](/image/commercialization/chapter_2/2-19.png) | +|-------------------------------------------------------| + +
图 2.19 开源开发工具用户粘性高
+
+ +图为各大SaaS产品净收入留存率,本质反映的是老客户的留存率,持续付费能力,和对产品的忠实度。开发者产品粘性普遍高于中位数,Snowflake174%位居榜首,Hashicorp、Gitlab、Confluent等也超过120%。 + +由此可见,在如此高粘性的背景下,越快的获客速率代表着未来更高的收入。当这些工具以开源形式提供时,它们可以更快速地被广泛采用,因为开源降低了尝试和采纳新工具的门槛。这种快速的市场扩张对于建立品牌知名度和用户基础至关重要。 + +**需求侧效益** + +开源开发者工具减小了中小企业进入大模型市场的成本,方便他们把精力更多地集中在应用层的开发上。对于中小企业来说,进入大规模模型和复杂系统的开发市场往往需要巨大的技术投入和资金支持。开源开发者工具降低了这一门槛,因为它们通常免费、或整体价格较为低廉,并且包含了大量已经过验证的功能和组件。中小企业可以利用这些资源来开发和测试自己的产品,而无需从头开始开发所有基础组件。如此,它们可以将更多的资源和精力集中在应用层面的创新和特定业务需求的解决方案上,而不是在构建基础技术上耗费大量时间和资金。这不仅降低了进入市场的成本,而且加快了产品开发的速度,使中小企业能够更有效地与大公司竞争。 + +由于开源开发工具的生态效应,它们的技术迭代速度通常超过了闭源工具。在这样的开源生态中,实验室的最新研究成果能迅速被集成和共享,这样的机制保证了技术的快速更新和传播。开源社区的活跃参与促进了创新思想和技术的快速交流,使得最新的开发工具和技术成果能够即时地被广大开发者所了解和使用。这种开源文化的优势在于它的开放性和协作性,为开发者提供了一个接触和利用最先进工具的快捷方便途径。它不仅加速了技术的发展,也为个体开发者或小团队提供了与大型企业竞争的机会,从而推动了整个技术领域的健康发展和创新。 + +#### 2.3.3 开发者工具开源需重视生态搭建 + +**做好开发者工具开源需要维持社区生态稳定的技术支持** + +开源开发工具依赖于社区和合作伙伴提供的支持和维护。这一点对于确保工具的稳定性和可靠性至关重要。例如,一个开源数据库管理系统的成功不仅取决于其功能,还取决于社区能否及时响应用户报告的问题并提供修复。同时,合作伙伴和生态系统中的用户提供的市场反馈对于开源开发工具的优化至关重要。如果一款开源的代码分析工具在企业环境中广泛使用,那么这些企业用户的反馈将直接影响工具的未来发展方向。这种反馈可以帮助开发者了解哪些功能最受欢迎、哪些需要改进,从而使工具更贴合市场需求。 + +**开源开发者工具需要与云厂商优势互补,扩大市场覆盖和用户基础** + +开发者工具本身要基于云厂商提供的平台进行部署,其优势在于专业性、技术实力强;而云厂商的优势则在于提供刚需的算力平台以及本身较为广泛的用户基础。二者合作开发者工具,开发商可以借助于云厂商提供更好的算力优惠吸引更多使用者,同时得益于云厂商本身的销售渠道,也能获得更强的终端触达力。这种良性循环有助于将开源开发工具推广到更广泛的用户群体。这不仅增加了工具的知名度,也为工具的实际应用和改进提供了更多的机会。更多的用户意味着更多的反馈,这反过来又促进了工具的持续优化和适应不断变化的市场需求。 + +以 MongoDB 为例,它很早便进行云转型,推出了 SaaS 服务 Atlas,尽管在 2017 年 MongoDB 上市时,Atlas 的收入只占总收入的 1%,在当时 MongoDB 已经打造基于 Open Core 模式的全部体系,但 MongoDB 依然花费大量资源打造 SaaS 相关产品和营销体系,之后 Atlas 的收入便以超过 40% 的年复合增长率飞快增长。相比之下其竞争对手 CouchBase 则过于依赖传统模式,花费很多精力做移动平台支持服务,该服务市场增长缓慢,将公司拖入了尾大不掉的泥潭。以 SaaS 服务为基础的产品体系对于当下的开发者工具厂商非常重要,需要重视与云厂商的合作。 +
+ +
+ +| ![image020](/image/commercialization/chapter_2/2-20.png) | +|-------------------------------------------------------| + +
+ +
图 2.20 MongoDB 各产品销售收入
+
+ +**建立生态有利于构建开源行业标准** + +开发者工具作为底层工具层,其对上层模型开发的原理架构具有决定性作用。与云厂商、开源模型厂商等合作伙伴间的协作有助于形成共识,建立行业标准,这对于确保开发工具的互操作性、兼容性以及用户体验的一致性至关重要。标准化可以减少兼容性问题,使不同的产品和服务能够更容易地集成和使用。例如 MongoDB 借助社区力量形成了 NoSQL RDMS 的行业标准。这个活跃的社区不仅为 MongoDB 早期的商业版带来了高质低价的许可证,也成为日后 Atlas (managed service) 的基础。Milvus 基于开源社区协作,推出了 Vector DB Bench(可以通过测量关键指标来衡量向量数据库的性能,使得向量数据库发挥出最大的潜能),从而逐渐建立起向量数据库的行业标准,方便用户针对性地选择适合需求的向量数据库。 +
+ +
+ +| ![image021](/image/commercialization/chapter_2/2-21.png) | +|-------------------------------------------------------| + +
+ +
图 2.21 向量数据库评价结果
+
+ +#### 2.3.4 开源开发者工具商业化路径探索 + +AI 开发者工具,与传统软件开发者工具在商业化维度上有可借鉴性,整体商业化还处于早期探索阶段,基于对目前已经尝试商业化的开发者工具开源项目的研究分析,发现目前有以下几种商业路径: + +**云上托管服务(Cloud Hosting Managed Service)- 按量计费** + +随着云计算的普及,已经有越来越多的开发者工具默认通过云上的托管资源,直接为用户提供服务。这样的云上托管服务即可以降低用户的使用门槛,也可以直接提供最新最专业的产品服务,在没有数据、安全、隐私的顾虑下,是不错的开发工具开源项目商业化的商业化选择。 + +在云上托管服务的商业模式下,越来越多的项目选择按量计费的商业模式(Consumption-Based Pricing)。按量计费通常也根据产品的不同,可以根据算力资源、数据量、请求数等等作为计费单位。 + +Hugging Face 推出的 AutoTrain 是一款可以根据用户提供数据集,自动选择适合模型并进行微调的平台,可选择模型类别包括:文本分类、文本回归、实体识别、摘要、问答、翻译和表格。为非研究人员提供了训练高性能 NLP 模型并快速有效地大规模部署的能力。AutoTrain 的计费规则未公开,而是基于训练数据和模型变体的数量,在训练前收取预估费用。 + +Scale AI 公司主要提供数据标注产品,定价模式较为简洁,Scale lmage 起价为每张图片 2 美分,每条标注 6 美分;Scale Video 起价为每帧视频 13 美分,每条标注 3 美分;Scale Text 起价为每项任务 5 美分,每条标注 3 美分;Scale Document Al 起价为每项任务 2 美分,每条标注 7 美分。除此之外,还有针对企业的收费方式,即根据具体的企业级项目的数据量及服务进行收费。 + +**云上托管服务(Cloud Hosting Managed Service)- 分级订阅计费** + +有部分开发工具层项目,同样使用云上托管服务,但以按年或者按月的方式提供订阅服务。 +
+ +
+ +| ![image022](/image/commercialization/chapter_2/2-22.png) | +|-------------------------------------------------------| + +
+ +
图 2.22 Dify.AI 订阅售价
+
+ +订阅的商业模式下,为了根据用户的不同需求与付费意愿,可以采用不同的分级,以做到成本与价格的平衡。以上图 Dify.ai 公司为例,针对不同体量的用户分级售价:针对个人用户有免费版,但考虑到成本费用,设定了诸多限制;针对专业个人开发者与小型团队,以较低价格解锁了部分限制,但依然存在着使用上限;针对中型团队,以较高价格提供相对完整的服务。 + +但无论是按量计费还是分级订阅的托管云服务,都只能提供标准化的产品服务,并且数据需要流向公有云。在一些大型企业侧,这样的商业模式依然存在私有化、定制化的需求。 + +**私有云 / 专有云 / 定制化部署** + +虽然越来越多的项目直接利用云上托管的服务,但当大型企业需要有更多私有化、定制化的需求时,云上托管服务就不再是可选项。 + +通常这样的商业模式下,项目也会为用户提供不同的选择。云上私有部署(Bring your own cloud)的模式在北美市场非常流行,而本地化部署(On-Premise)的场景更能满足对数据合规更敏感的场景。 + +开发工具层的开源项目商业化,经常出现提供包括以上三种商业模式在内的多种选择,这个可以理解为,这一层面的客户需求呈现出多样性与复杂性,并且在商业模式探索上,各个项目也在不同路径上同步尝试,未来的发展方向值得长期持续关注。 + +#### 2.3.5 开发者工具侧开源的成功案例 + +Zilliz 是研发面向人工智能的新一代数据处理和分析平台,其主要是为应用型企业提供底层技术。Zilliz 研发的 GPU 加速的 AI 数据中台解决方案 Mega,其中包括数据 ETL 系统 MegaETL、数据库系统 MegaWise、面向 Hadoop 生态的模型训练系 MegaLearning 和特征向量检索系统 Milvus,可满足传统的加速数据 ETL、加速数据仓库和加速数据分析的场景和需求,面向各类新兴的 AI 应用场景,已被全球 1,000 多个企业使用,涵盖金融、电信、安防、智慧城市和电子商务等行业。 +
+ +
+ +| ![image023](/image/commercialization/chapter_2/2-23.png) | +|-------------------------------------------------------| + +
+ +
图 2.23 Zilliz 全球用户(来源于公司官网)
+
+ +Zilliz 的成功代表着基于 GPU 的大数据加速器为企业日益增长的数据分析需求提供了有效解决方案。Zilliz 的核心项目向量相似度搜索引擎 Milvus 是全球首款 GPU 加速海量特征向量匹配和检索引擎。Milvus 依托 GPU 加速,提供极速特征向量匹配以及多维度数据联合查询(特征、标签、图片、视频、文本和语音等联合查询)功能,并且支持自动分库分表和多副本,能对接 TensorFlow、PyTorch 和 MxNet 等 AI 模型,可实现百亿特征向量的秒级查询。Milvus 于 2019 年 10 月在 GitHub 上开源,Stars 数量持续高速增长,2023 年 12 月达到 25k+,拥有超过 200 位贡献者和 4000 + 用户的开发者社区。资本市场上,Zilliz 在 B 轮获 4,300 万美金,成为全球开源基础软件最大单笔 B 轮融资,表明了投资机构对 Zilliz 未来发展潜力的看好。 +
+ +
+ +| ![image024](/image/commercialization/chapter_2/2-24.png) | +|-------------------------------------------------------| + +
+ +
图 2.24 Zilliz Github 社区运营情况
+
+ +Zilliz 的主要产品是向量数据库,是开发者工具中的关键一环,这种专门用于存储、索引和查询嵌入向量的数据库系统,可以让大模型更高效率的存储和读取知识库,并且以更低的成本进行模型微调,还将进一步在 AI Native 应用的演进中扮演重要作用。 + +Zilliz 的商业化产品为 Zilliz Cloud,采用月度订阅模式,采用 SaaS 的部署方式,基于向量数量、向量维度、计算单元(CU) 类型、数据平均长度,来确定每月的订阅费用。Zilliz 同时也提供基于 PaaS 的专有部署服务,适用于高度注重数据隐私和合规的场景,这一部分为定制化计价。 +
+ +
+ +| ![image025](/image/commercialization/chapter_2/2-25.png) | +|-------------------------------------------------------| + +
+ +
图 2.25 Zilliz 价格计算器示例
+景,这一部分为定制化计价。 +
+ +### 2.4 AI 应用层开源工具百花齐放 + +#### 2.4.1 应用层开源工具百花齐放 + +应用层人工智能的发展正如百花齐放之景,展现了技术多样性和应用广泛性的壮观图景。当下,应用层 AI 的影响力不断扩大,它们有的面向 C 端用户,提供涵盖日常生活方方面面的服务,如娱乐、社交、音乐、个人健康助理等等;同时也在更专业 B 端领域发挥着重要作用,如市场分析、法务处理、智能设计等。这些应用展现了 AI 技术的深度和广度,不仅提高了效率和便利性,还在很大程度上推动了创新和科技进步。 +
+ +
+ +| ![image026](/image/commercialization/chapter_2/2-26.png) | +|-------------------------------------------------------| + +
+ +
图 2.26 百花齐放的 AI 应用层产品(信息源:Sequoia)
+
+ +大量开源应用层产品也随之诞生,这些应用层产品多是基于大模型底座、结合行业特定数据集进行微调得到。相比于通用大模型,针对行业定制的应用层工具具有更好的性能,开源的特性也有助于使用这些应用的 B 端、C 端用户进行进一步的定制化开发,以更加符合需求。 + +应用层的开源工具促进了跨学科和跨行业的融合。例如,医学、金融、教育和零售等行业都在利用开源 AI 工具来解决行业特有的问题,推动了技术在各个领域的应用。由于成本低和风险小,开源工具鼓励了实验和创新。开发者可以自由地试验新的想法和技术,这种实验精神极大地推动了应用层的繁荣。 +
+ +
+ +| ![image027](/image/commercialization/chapter_2/2-27.png) | +|-------------------------------------------------------| + +
+ +
图 2.27 应用测开源工具图谱(仅以各领域部分产品举例)
+
+ +#### 2.4.2 应用层开源的驱动因素 + +**开源应用层产品使用门槛低,更易被用户接受** + +应用层开源工具价格较低,更符合国内企业付费意愿低的特点。根据艾瑞咨询的数据,国内企业内部管理流程不够专业,对软件价值认可度低,更愿意为人力付费。厂商需要曲线教化企业,给企业接受产品的缓和期,逐步释放需求端。基于上述背景,开源工具以其低成本特性满足了这些市场的需求,使得企业更愿意尝试和采纳这些工具。对于预算有限的国内企业来说,低成本是一个显著的优势。低成本或无成本的特性使得这些企业能够在不增加财务负担的情况下访问和使用先进的技术工具。 + +同时开源工具的低成本特性鼓励企业进行长期投资。企业可以在不承担重大财务风险的情况下,逐步构建和扩展其技术基础设施。随着企业对于开源产品的理解加深、依赖程度加深,开源产品可以逐步考虑提供增值服务的内容,从而达到长期获客的目的。 + +同时开源产品有利于实现与其它系统的无缝集成,提升用户体验。开源应用层产品的一个显著特点是它们通常具有高度的灵活性和可定制性。允许用户根据自己的具体需求进行修改和调整。这意味着开源产品可以被定制,以更好地适应现有系统和工作流程,从而实现与其他系统的无缝集成。许多开源项目遵循行业标准,这有助于确保不同系统和组件之间的兼容性。标准化促进了不同软件产品之间的互操作性,简化了集成过程,从而提高了整体的用户体验。开源社区通常由来自全球的开发者和用户组成,他们共同努力改进产品并提供支持。这种协作精神不仅促进了产品的持续改进,也为解决集成过程中可能遇到的问题提供了资源。 + +**开源应用层产品可获得来自社区的贡献,促进技术迭代、拓宽适用场景** + +应用层开源可以获得来自社区开发力量的大力支持。由于应用场景更加多样与分散,不同细分场景的需求差异性更大,对应场景的贡献者专业性要求更强。Stable Diffusion(SD)是一款开源的文生图应用,在社区力量的加持下,自发布以来其性能迅速追赶,并在某些方面超过闭源文生图应用 Midjourney。虽然使用 Stable Diffusion 时存在一些不便之处,但用户能够从社区获取成百上千的 LoRA、微调设置和文本嵌入。例如,用户在使用 Stable Diffusion 时发现它处理手部图像的能力有限。对此,社区迅速作出反应,在接下来的几周内就开发出了一个专门针对手部图像问题的 LoRA 修复。这种社区的及时和专业反馈极大地促进了应用层开源工具的快速进步和改进。 + +开源产品由于更低的使用门槛,一经发布,就可能被来自不同行业和背景的用户采用,应用于各种环境和情境。这些应用场景可能远远超出了开发者最初的设计和想象。当产品在这些多样化的场景中被使用时,它们可能展现出新的潜力或需求,揭示了之前未被注意到的使用情景。这可以为产品开发者提供宝贵的洞察,帮助他们理解产品在实际使用中的表现和潜在的改进空间。面对这些新发现的使用情景,开发者有机会进行创新和改进。他们可以根据用户在不同环境中的实际使用经验来增加新功能、优化现有功能或重新设计产品以更好地满足这些需求。这种基于实际使用情况的迭代,是开源产品不断进步的重要驱动力。 + +**应用层开源产品具有的 Product-Led Growth(PLG)模型特征可以促进付费转化** + +PLG 模式主要通过自下而上的销售模式进行获客,产品是整个销售过程的核心。PLG 模式的增长飞轮有三个主要阶段:获客、转化、留存。在这三个阶段中,开源都有着区别于传统商业模式的优势。 + +在获客阶段,开源运营模式降低了获客成本,并且使获客流程更具针对性。开发人员的相互交流、GitHub 等平台带来的社区型协作,加速了传播获客。开源产品的初始客户定位通常为开源社区的参与者,他们往往是企业里的开发者或者 IT 人员。培育了这些优质潜在客户,也就具备了 “群众基础”。社区帮助打开企业的边界,让好的开源项目和产品的口碑传播得以可能。使用者为了解决自身问题和痛点,自发地进行下载使用。此时开源软件产品不仅仅是作为通过功能解决用户问题的一个方式,也可以成为帮助企业去传播和增长的一个载体。长期来看,就可以降低企业的获客成本,让自动化的获客越来越多,降低销售方面的费用支出。 + +在转化阶段,相比较传统商业软件,开源软件往往拥有更高的付费转化率。一方面,当用户使用过免费版的软件后,只要软件的功能可以很好的满足用户需求,就可以以较短周期的速度进行付费转化,并使其成为长期用户。另一方面,企业可以通过观察用户对免费版软件的使用行为,进行有针对性的转换跟进和追加销售,例如,向销售团队提供超出其使用限制并准备付款的客户列表。除了传统的销售转化,还可以通过自助购买路径进行转化(Self-service selling),这种转换路径很大程度上降低了销售成本。 + +在留存阶段,开源软件可以使用户规避供应商锁定风险,使其愿意进行长期使用。基于同一个开源项目,其下游可能会出现多个提供相似功能软件的供应商,并且可以以比较小的成本来改变供应商的选择,因此用户可以放心地选择长期使用软件。相反地,当顾客使用闭源产品时,如果在使用一段时间后想要转换使用另一个软件,就需要重新进行硬件、数据等的部署,造成不小的转移成本。因此当用户选择使用闭源软件时,可能会由于软件后期开发情况不满足需求或者转移成本过高,而放弃对软件的继续使用。 +
+ +
+ +| ![image028](/image/commercialization/chapter_2/2-28.png) | +|-------------------------------------------------------| + +
+ +
图 2.28 应用层开源增长飞轮
+
+ +#### 2.4.3 大模型应用层开源的市场现状 + +**互联网巨头与初创企业共同发力** + +在大模型应用层开源市场中,无论是互联网巨头还是初创企业,均有机会参与和竞争。这主要得益于以下几个因素:1)降低的技术门槛。模型层、开发者工具层的开源,降低了技术获取和应用的门槛。初创企业可以利用开源模型和工具,开发出符合特定需求的解决方案,而无需从头开始开发复杂的大模型算法。2)成本效益。开源模型通常无需高昂的许可或 API 费用,这对资金相对有限的中小企业尤其有利。3)创新与灵活性。初创企业通常能够更快速地适应市场变化,并针对特定的细分市场或应用场景进行创新。 + +目前互联网巨头主要以本身底座大模型为基础,在其上延伸出一系列垂类应用。例如阿里的通译千问,近期阿里发布通译千问 2.0,并在此基础之上引申出八大应用:通义听悟 (语音识别)、通义晓蜜(提升客服效率)、通义智文 (理解文本)、通义星尘(个性化角色)、通义灵码(辅助编程)、通义法睿(法律行业)、通义仁心 (医药行业)、通义点金(金融行业)。 + +初创企业主要选择某一细分行业进行深耕,如澜舟科技自研大模型聚焦于于营销、金融、文化创意等场景;XrayGPT 聚焦于医学放射图像分析;Finchat 聚焦于金融领域模型等等。云启在今年支持了两个开源的应用层初创项目,分别是辅助编程的工具 TabbyML 和可以实时定制 AI 个人助手的 Realchar,他们都快速地在 Github 上积累了大量用户。 + +**B、C 端的竞争格局不同** + +在大模型应用层开源市场,面向企业 B 端和消费者 C 端的竞争格局存在显著差异: +- **B 端市场**:面向企业的应用通常专注于提高效率、降低成本和增强决策能力。在这一领域,开源大模型可以被用于自动化流程、数据分析、客户服务优化等。这里的竞争更多地集中在技术的实用性和定制化能力上。 +- **C 端市场**:面向消费者的应用则更注重用户体验、交互性和易用性。这包括个性化推荐、虚拟助手、娱乐和社交媒体应用等。C 端市场的竞争更多地体现在创新的用户界面和吸引用户的新功能上。 + +**大量子场景尚属于蓝海市场,未出现明显头部** + +随着技术的发展,市场对于 AI 应用的需求变得越来越细分。例如,在医疗、法律、金融、教育等行业中,每个领域都有其独特的需求和挑战。这些细分市场提供了大量的机遇,但同时也需要针对性的解决方案。目前在这些领域都有一些相关应用出现,但大部分都处于初创阶段,尚未产生头部应用。而且由于模型的细分行业众多,竞争不甚激烈,因此是一个较好的入局机会。在这些蓝海市场中,由于市场新颖且不断发展变化,尚未形成明显的市场龙头。这为新进入者和创新者提供了机会,他们可以通过独特的解决方案或创新的业务模式来占据市场份额。 + +**基于大模型新的能力,期待创新性的应用出现** + +尽管大模型技术已经取得了显著进展,但其在特定应用领域的深度整合和创新应用还在初级阶段。这意味着在许多子场景中,还有大量的空间需要探索和实现新的应用方式。随着大型人工智能模型的快速发展,我们正迎来一个充满潜力和创新的新时代。这些模型不仅将优化和改进现有的技术应用,更重要的是,它们将成为引领全新市场和应用领域的先锋。在这个充满未知和惊喜的未来,我们可以期待出现种类繁多、功能强大的新应用,它们将以前所未有的方式融入我们的日常生活。这些新兴的市场和应用将打开一扇窗,让我们窥见前所未见的可能性,带来深远的社会和文化变革。它们将激发人类的创造力和想象力,推动我们突破现有的技术边界,探索更广阔的世界。 + +在这个充满活力和创新的时代,我们将见证技术与日常生活的无缝融合,体验到智能化带来的便捷和效率。人类与机器的协同合作,将打开新的合作和创新模式,引领我们走向一个更智能、更高效、更个性化的未来。这是一个充满期待的时刻,每一步技术的进步都在为我们打造一个更加精彩、丰富和多元的世界。在这个新时代,我们将共同见证和创造前所未有的奇迹,一起探索科技与人类共同发展的无限可能。 + +### 2.5 大模型开源商业化面临的挑战 + +#### 2.5.1 技术高速发展,开源项目需要持续迭代以保持竞争力 + +在人工智能和大模型领域,技术的发展速度极快。,新的算法、数据处理技术、优化方法和计算架构不断涌现。对于开源项目而言,这意味着需要不断地更新和升级,以保持技术的先进性和有效性。这种持续更新的需求对资源和时间都是一种挑战。对于开源项目来说,特别是那些资金和人力资源相对有限的项目,要跟上这种快速的技术迭代步伐有一定挑战。这意味着他们不仅要与时间赛跑,还要面对来自商业公司和其他开源项目的激烈竞争。如果一个项目无法及时更新以反映最新的技术进展,它可能很快就会变得过时,从而失去用户和社区成员的兴趣和支持。 + +面对来自一些科技巨头如 OpenAI、阿里等有充足资金的公司,一些中小型公司花费大量成本开发的大模型可能会很快被超越,从而导致严重的资金缺口。对于大厂商可以采取 “烧钱” 的战略,而中小型公司则无力支撑,这有可能打击目前百花争鸣的大模型市场,降低其多元性。 + +#### 2.5.2 抄袭 / 借鉴范围难以界定 + +开源大模型的初衷是让更多的用户接触和使用大模型,但是在使用过程中经常会就代码归属权、许可证等很多问题产生争议。由于大模型开源是一个较新的概念,相关法律法规制度不完善,很多还涉及跨国界的问题,因此关于大模型是抄袭还是借鉴,没有一个清晰的定义边界。近期零一万物有关 LLaMA 的 “套壳争议” 问题引发了广泛的关注。舆论持不同观点但没有最终的统一判断,其核心便在于抄袭 / 借鉴范围难以界定。 + +有些观点认为,零一万物的软件使用 Llama 的源代码却不标来源,让别人看起来这部分内容是他们自己开发的,确实涉嫌侵犯署名权,也就是涉嫌抄袭。但也有观点认为零一万物研发大模型的结构设计基于成熟结构,借鉴了行业顶尖水平的公开成果,由于大模型技术发展还在非常初期,与行业主流保持一致的结构,更有利于整体的适配与未来的迭代。同时零一万物团队对模型和训练的理解做了大量工作,也在持续探索模型结构层面本质上的突破。 + +在大模型技术尚处于起步阶段,法律法规尚不完善的背景下,这种辨识变得更加复杂。我们应认识到,随着技术的不断演进和法律体系的完善,如何平衡保护创新与促进合作的关系,将是一个需要持续探讨和完善的过程。最终,这不仅是一个法律和技术的问题,更是关乎整个行业健康发展的伦理与道德议题。 + +#### 2.5.3 社区参与者难以对模型迭代提供直接贡献 + +在构建和迭代大型人工智能模型的过程中,生态社区的参与者面临一个显著挑战:由于模型训练的复杂性,他们往往难以为模型的发展做出直接贡献。这些大模型,如 LLaMA 或其他先进的机器学习模型,通常需要高度专业的技术知识和资源,包括大规模的数据处理能力、深入的算法理解以及昂贵的硬件资源。对于普通社区成员来说,这些要求往往超出了他们的能力范围。 + +因此,尽管社区成员可能充满热情并愿意参与,但他们对模型进行实质性迭代方面的能力受到限制。这种专业能力的缺乏意味着,即使是最活跃的社区成员,也可能只能在模型的应用、反馈收集或初级调试等相对边缘的领域发挥作用。这种局限性不仅影响了社区对模型发展的贡献程度,也可能导致模型开发过程中社区参与感和归属感的减弱。因此,寻找合适的方式使更广泛的社区参与者能够有效地贡献其智慧和努力,是大模型发展中的一个重要课题。 + +#### 2.5.4 开源技术发展快,后期更新成本高 + +开源软件的一个主要优势是降低了用户的初始成本。企业无需支付昂贵的许可费用就可以获得和使用开源大模型。这对于预算有限的小型企业或初创公司尤其有吸引力,因为它们可以利用先进的技术而无需承担重大的财务负担。虽然开源软件在初始阶段节省了成本,但在长期运营过程中,它们可能会带来更高的更新成本。 + +开源项目通常以其创新速度和社区驱动的动态性著称,这促使技术不断进步和演化。然而,随着技术的迅速更新和迭代,维护和升级现有系统的成本也随之增加。这种成本不仅包括直接的财务投入,比如硬件升级或购买新的服务,还包括间接成本,如培训员工以适应新技术,以及将现有系统迁移到更新版本的时间和劳力。特别是对于长期项目而言,持续跟进最新的开源技术变得尤为挑战。每一次重大更新或技术转型都可能涉及复杂的适配工作和兼容性测试,这需要大量的人力和技术资源。此外,频繁的更新可能导致系统稳定性和安全性问题,增加潜在的运营风险。 + +因此,尽管开源技术提供了创新和灵活性的巨大优势,但企业和开发者在采用和维护这些技术时,必须认真考虑到与之相关的更新成本,以及如何在持续创新和成本效益之间找到平衡点。 + +虽然开源大模型目前面临着众多挑战,如技术迭代的快速发展、抄袭风险、社区贡献的局限性以及维护成本的增加等,但其未来依然充满希望。开源大模型在推动技术创新、促进知识共享、加速研发流程等方面已经显示出巨大的潜力。为了实现这些潜力并克服当前的挑战,需要来自不同领域和背景的各方共同努力! + +## 三. 开源安全挑战 + +安全问题是决定一款开源产品能否顺利商业化的重要因素。企业用户通常需要对使用产品进行全面的安全评估,以保证整体业务的安全可控,其中包括网络攻击安全、数据安全、商业许可证可控等。 + +根据 Synopsys 数据,截至 2022 年末,84% 的代码库包含至少一个已知的开源漏洞,48% 包含高风险漏洞,34% 的受访者还表示,他们在过去 12 个月内经历过 “利用开源软件已知漏洞发起的攻击。开源安全问题是一个需要高度关注的问题,它极大程度影响了客户对于开源软件的信任度,以及庞大的开源生态在未来能否行稳致远。只有做好安全保障,开源软件才能在商业化的道路上走得更远。 +
+ +
+ +| ![image029](/image/commercialization/chapter_3/3-1.png) | +|-------------------------------------------------------| + +
+ +
图 3.1 开源代码库漏洞(数据源:Synopsys)
+
+ +### 3.1 开源软件网络安全 + +#### 3.1.1 开源软件安全漏洞会被利用造成严重后果 + +开源软件在推动技术创新和促进知识共享方面发挥了关键作用,但它们也固有地面临安全漏洞的风险。这些安全漏洞的根源通常在于开放性代码的管理和维护问题,例如编程错误、缺乏持续的安全审查,以及对更新和补丁的滞后应用。特别是在项目活跃度不足或缺乏有效监管的情况下,这些漏洞可能长时间未被识别或修复。历史上,由于开源软件的安全漏洞,已经发生了多起严重的安全事件,造成了敏感数据泄露和经济损失。 + +在 2014 年 4 月,被广泛使用的开源组件 OpenSSL 出现了一项重大安全漏洞,被称为心脏滴血(Heartbleed)。这个漏洞自 2012 年 5 月的版本开始就存在,使得攻击者能够获取包含证书私钥、用户名、密码、电子邮箱等敏感信息的数据。由于这个漏洞在长达近两年的时间内未被发现,其造成的影响极其广泛,几乎无法准确估量。再如,在 2021 年 12 月,另一款广泛使用的开源组件 Apache Log4j2 被发现存在一项严重的远程代码执行漏洞,称为 Log4Shell。这个漏洞由于 Apache Log4j2 的高性能和低利用门槛,迅速在全球范围内传播,影响了包括 Steam、Twitter、亚马逊等在内的多家知名公司和服务平台。 + +#### 3.1.2 开源软件网络安全问题相对普遍 + +**开源软件本身安全漏洞较多** + +根据 “2022 年奇安信开源项目检测计划” 结果显示,开源软件整体缺陷密度为 21.06 个 / 千行,高危缺陷密度为 1.29 个 / 千行。连续三年缺陷密度和高危缺陷密度数量不断增长,且有加速的趋势。开源软件十类典型缺陷的总体检出率为 72.3%,而这一数据两年前仅为 56.3%,检出率迅速增长,开源软件自身安全问题相当严峻。 +
+ +
+ +| ![image030](/image/commercialization/chapter_3/3-2.png) | +|-------------------------------------------------------| + +
+ +
图 3.2 开源软件平均缺陷密度三年对比
+
(数据来源:2023 中国软件供应链安全分析报告)
+
+ +从开源软件缺陷漏洞的绝对数量看,根据奇安信统计的数据,截至 2022 年底来自公开漏洞库中收录的开源软件相关漏洞达到 57,610 个,在 2022 年新增漏洞 7,682 个,增量约 15%,这一状况令人担忧。 + +::: info 专家点评 +**余杰**:开源软件的安全问题亟待得到充分的重视,仅凭社区个体的力量显然不足以应对。如何构建有效的体系与制度来全面保障开源软件的安全,成为伴随其高速发展不可回避的重大课题。 +::: + +**活跃度过低 / 过高的开源项目更易存在安全风险** + +开源软件如果活跃度过低,更新频率不足,则会导致不能及时修复出现的漏洞,从而增大软件的风险敞口;若活跃度过高,更新频率过快,也会导致使用者无法及时地相应更新,为安全运维带来较大压力。 + +根据奇安信的数据,若将超过一年未更新版本的开源项目视作不活跃项目,则 2022 年在主流开源软件包系统中不活跃的开源项目为 3,967,204 个,占比达 72.1%,而这一比例在 2021、2020 年分别为 69.9%、61.6%,说明开源作者整体维护积极性有所降低,对于开源软件生态安全的长期发展不利。 +
+ +
+ +| ![image031](/image/commercialization/chapter_3/3-3.png) | +|-------------------------------------------------------| + +
+ +
图 3.3 不活跃开源项目统计
+
+ +在普遍活跃度较低的背景下,也有部分开源软件活跃度过高,同样为使用者带来了很大的安全运维压力。根据奇安信,2022 年主流开源软件包生态系统中更新发布 100 个以上版本的开源项目有 22,403 个,这一数字在 2021、2020 年分别为 19,265、13,411 个。 +
+ +
+ +| ![image032](/image/commercialization/chapter_3/3-4.png)| +| -------- | + +
+ +
图 3.4 极度活跃的项目统计
+
+ +活跃度过低、过高都给开源生态的使用者们带来较高的安全风险,迫切需要一个平衡点,以保证开源软件的健康和持续发展。需要建立更加科学的版本管理和发布机制,确保更新既能及时响应安全和功能需求,又不会过度频繁地打扰用户。对于活跃度不足的项目,可以通过增加社区参与、提供激励机制等方式来提升其活跃度。对于更新频繁的项目,应该更加注重与用户的沟通,提供清晰的更新日志和支持指南,帮助用户更好地理解和适应这些变化。 + +同时,也应该鼓励用户积极参与开源项目的反馈与贡献,形成良性互动。用户的实际使用体验和反馈是调整更新节奏、优化软件功能的重要参考。通过建立健康的用户 - 开发者互动机制,可以有效平衡活跃度和更新频率,确保软件的安全性和可用性。 + +**部分用户使用过于老旧的软件,使用版本混乱** + +根据奇安信的数据,很多软件项目使用的开源软件版本非常老旧,甚至是 30 年前发布的版本,漏洞较多,风险敞口非常大。其中最早的一款软件是在 1995 年发布的 IJG JPEG 6,仍然被很多项目使用。老旧的版本往往伴随着老旧的漏洞,目前部分软件项目中仍然存在很老旧的开源漏洞。最古老的漏洞来自于 2002 年,距今已 21 年,11 个项目依然在使用。 +
+ +
+ +| ![image033](/image/commercialization/chapter_3/3-5.png) | +|-------------------------------------------------------| + +
+ +
图 3.5 古老的开源漏洞及其使用情况
+
+ +开源软件版本使用混乱的状况非常严重,并非都是最新版本。例如 Spring Framework,共有 181 个版本在使用。使用早期版本就会导致大量新版已经被修复的漏洞仍然可以被恶意利用,从而带来很大的安全风险。 + +#### 3.1.3 开源软件漏洞风险的应对策略 + +**定期的安全审计和代码检查** + +需要定义一个清晰的审计流程,包括对软件的整体架构、代码库以及依赖关系的全面审查。组建专门的安全团队来执行这些审计,或者利用第三方安全服务。这些团队或服务提供商应具备深入理解开源软件的能力。 + +同时定期举行代码审查会议,鼓励团队成员相互审查代码,这不仅有助于发现潜在的安全问题,还能提高团队的编程技能和代码质量。审计和代码审查应是一个持续的过程,不断地监控和更新代码库以响应新发现的漏洞和安全威胁。 + + + +**使用 SCA(软件成分分析)工具** + +软件成分分析(SCA)是一种管理开源组件安全的方法,使开发团队能够迅速追踪和分析项目中使用的开源组件。SCA 工具能够识别所有相关的组件和支持库,以及它们之间的直接和间接的依赖关系。此外,它们还能检查软件许可证、识别已弃用的依赖项,并发现潜在的漏洞和威胁。通过 SCA 扫描,会产生一个包含项目软件资产完整清单的物料清单(SBOM)。 + +随着开源组件在软件开发中的广泛使用,SCA 逐渐成为应用安全的关键组成部分,尽管这一概念本身并不新颖。SCA 工具的数量随着其重要性的增加而增多。在包括 DevSecOps 在内的现代软件开发实践中,SCA 不仅需要为开发人员提供易用性,而且还需要在整个软件开发生命周期(SDLC)中引导和指导开发人员安全地开展工作。 + +在使用 SCA 处理开源安全问题时,应着重考虑以下几点: + +- 采用对开发者友好的 SCA 工具:开发人员通常忙于编写和优化代码,他们需要的是能够促进高效思考和快速迭代的工具。不友好的 SCA 工具可能会拖慢开发进程。易于使用的 SCA 工具能够简化设置和操作。这种工具应该能够轻松地与现有的开发工作流程和工具集成,并应尽早在软件开发生命周期(SDLC)中实施。重要的是要让开发人员理解 SCA 的重要性,并将其安全检查流程融入到他们的日常工作中,从而减少因安全问题导致的代码重写。 +- 将 SCA 集成到 CI/CD 流程中:使用 SCA 工具并不意味着会干扰开发、测试和生产流程。相反,企业应将 SCA 扫描集成到持续集成 / 持续部署(CI/CD)流程中,这样可以在软件开发和构建过程中作为一个功能部件,来识别和修复漏洞。这种做法也有助于开发人员将代码安全作为其日常工作流程的一部分。 +- 有效利用报告和物料清单:包括美国联邦政府在内的许多组织在购买软件时都要求提供软件物料清单(SBOM)。提供详细的物料清单意味着,企业认识到跟踪应用程序内每个组件的重要性。清晰的安全扫描和修复报告同样至关重要,它们提供了有关企业安全实践和修复漏洞数量的详细信息,展现了对软件安全的承诺和实际行动。 + +**增强教育和培训** + +对开发人员进行定期的安全意识培训,以提高他们对安全威胁和最佳安全实践的认识,包括教育他们识别常见的安全漏洞和攻击手段。通过实战模拟练习和工作坊,让开发人员在安全的环境中学习如何处理安全事件。这些练习可以包括漏洞挖掘、代码修复和安全测试。 + +鉴于安全领域的快速变化,鼓励开发者持续学习和更新他们的知识,包括参与在线课程、研讨会和行业会议。建立一个平台,如内部论坛或定期会议,让开发人员分享他们在安全方面的知识和经验,以促进团队间的学习和合作。 + +### 3.2 开源许可证的可控 + +#### 3.2.1 开源许可证是一种针对开源资源使用者的约束,类别丰富 + +开源许可证是一种针对开源资源(包括但不限于软件、代码、网页使用者)的约束。基于开源许可证,用户获得对开源资源进行使用、修改、共享等权利。如果软件没有许可证,就意味着保留版权,用户只能查看源码而不能进行使用。因此开源许可证本质上是一种法律许可,可以保护项目贡献者和开源资源用户,保证贡献者能以他们希望的方式开源所拥有的资源,也保证使用者以合理合法的方式使用资源而避免陷入知识产权争端,从而极大促进了开源社区的繁荣。 + +开源许可证根据授权的限制程度整体分为三类:Permissive、Weak Copyleft、Strong Copyleft +
+ +
+ +| ![image034](/image/commercialization/chapter_3/3-6.png) | +|-------------------------------------------------------| + +
+ +
图 3.6 开源许可证分类
+
+ +**Permissive 类别**属于最为宽松的一类许可证,包括 BSD、MIT、Apache、ISC 等,这类许可证提供了极为宽松的授权条件,允许人们自由地使用、更改、复制及传播该软件。它们同样支持将软件用于商业或非商业用途。唯一的要求是,在软件的每份副本中都必须包含相应的许可证文本和版权信息。 + +**Weak Copyleft 类别**是相比于 Permissive 类更严格的许可证,包括 LGPL、MPL 等,这类许可证规定,任何对代码所作的修改都必须在相同的许可证下发布。同时,修改后的代码中必须包含原始代码的授权和版权信息。然而,它们并不强制要求整个项目必须使用相同的许可证进行发布。 + +**Strong Copyleft 类别**是相对更为严格的许可证,包括 GPL、AGPL、CPL 等,这种类型的许可证规定,整个项目必须在相同的许可证下发布,包括那些只使用了软件一部分的情况。此外,这些许可证还要求所有修改过的代码版本必须被公开发布。 + +在这些大类之下,具体的许可证和许可证族都会有独特的限制、权限,附加参数也会有具体差异,许可证整体的逻辑关系整理如下: +
+ +
+ +| ![image035](/image/commercialization/chapter_3/3-7.png) | +|-------------------------------------------------------| + +
+ +
图 3.7 许可证逻辑关系
+
+ +开源社还提供了开源许可证选择器,为更快更好的了解最佳的许可证选择提供了很好的帮助,强烈推荐给有需求的同学:https://kaiyuanshe.cn/tool/license-filter + +#### 3.2.2 使用开源资源不遵守许可证会产生侵权风险 + +**开源许可证侵权** + +“开源许可证侵权” 是指在使用开源软件时,未遵守与该软件相关联的开源许可证的条款和条件,从而违反了许可证规定的法律约束。这种行为可能导致一系列法律和道德上的问题。开源软件虽然是免费提供给公众使用和修改的,但这种使用和修改仍受到一定限制,这些限制由相应的开源许可证明确规定。 + +其具体情况包含但不限于以下几点: + +版权声明和署名的忽略:许多开源许可证要求在复制、分发或修改软件时必须保留原有的版权声明和作者署名。忽视这一要求,如删除原作者的版权信息或未适当地署名,都被视为侵权行为。 + +源代码的不提供:某些许可证,如 GPL(通用公共许可证),要求在分发软件的同时提供源代码。如果一个基于此类许可证的软件被分发,但未同时提供源代码,这也构成侵权。 + +限制性的使用:一些许可证对软件的使用场景设有限制。例如,某些许可证可能禁止在特定类型的商业活动中使用软件。违反这些限制性条款也属于侵权行为。 + +分发和再授权的条件违反:如 GPL 等强制性开源许可证要求,任何基于 GPL 许可证软件的修改和衍生作品也必须以 GPL 许可证发布。违反这一规定,如私有化 GPL 代码或以非 GPL 许可证发布衍生作品,都会构成侵权。 + +特定条款的违反:除了上述常见情形,还有一些特定的许可证条款可能在特定情况下被违反。这取决于特定许可证的具体要求。 + +**许可证互惠性要求导致开源版权问题范围扩大** + +所谓开源许可的 “互惠性要求” 即衍生作品是否要沿用原作品许可证是指,在软件开源的过程中,包括复制、修改、处理、再发布、展示等,开源许可的条款和条件往往会持续适用。这种许可证的权限和限制可以纵向地延伸到基于原始软件开发的衍生作品和修改版本,甚至横向影响到基于这些开源软件开发的其他软件部分。 + +在众多的开源许可证中,GPL 的互惠性要求最强,相关法律诉讼也最多。其主要原因是:任何基于 GPL 代码修改的衍生软件都需要开源。如果一个软件包含了 GPL 代码,即使只是一部分,这个软件整体通常也需要开源(除非符合特定的例外条款)。如果未将受 GPL 影响的专有软件部分开源,使用者可能会违反 GPL 许可证的义务,从而构成侵权。而且 GPL 的条款极为复杂,包含 17 个条款。它对用户的要求更为严格,一旦违反了这些要求,用户的授权协议即被终止,继续使用 GPL 授权的开源软件则可能构成侵权。 +
+ +
+ +| ![image036](/image/commercialization/chapter_3/3-8.png) | +|-------------------------------------------------------| + +
+ +
图 3.8 GPL 许可证相关诉讼
+
+ +**开源许可证侵权可能会导致严重后果** + +开源许可证一旦侵权被定性,给被告企业、个人带来的损失远不止赔偿一方面,还包括声誉、合作伙伴关系等一系列问题: + +法律诉讼和罚款:在 2017 年,Versata Software 起诉 Ameriprise Financial,称其违反了 Versata 的专利权。虽然这不是纯粹的开源许可证侵权案例,但它涉及到软件许可和版权问题。这起案件最终以和解告终,但涉及到的法律费用和时间成本非常高昂。 + +强制遵守许可证要求:一个著名的案例是 2015 年的 VMware 与 Hellwig 案件。Hellwig,一位 Linux 内核开发者,指控 VMware 在其 ESXi 产品中使用了基于 GPL 的 Linux 代码,但未遵循 GPL 许可证的开源要求。虽然最终法院没有做出对 Hellwig 有利的判决,但这起案件引发了关于 GPL 许可证义务和衍生作品的广泛讨论。 + +声誉损害:Red Hat 在 2004 年对 Speakeasy, Inc. 提起诉讼,指控其未遵守 GPL 许可证的要求。尽管案件和解,但 Speakeasy 的声誉受到了影响,特别是在开源社区中。 + +商业影响:Cisco 在 2008 年因为其 Linksys 产品违反 GPL 许可证而被 Free Software Foundation(FSF)起诉。Cisco 最终同意遵守 GPL 许可证的规定并支付未公开的金额作为捐赠。这起诉讼导致 Cisco 不得不重新考虑其产品的开源策略。 + +合作伙伴关系的破坏:一家公司被发现违反开源许可证,它的商业合作伙伴可能会重新评估与该公司的合作关系,特别是在合作项目涉及开源软件时。 + +#### 3.2.3 开源大模型许可证很大程度上区别于传统许可证 + +由于开源大模型还在发展和迭代,本年度两个影响力极大的开源大模型:LLaMA2 和 Falcon,都因为开源许可证条款的调整而被人质疑是否是真正的 “开源”。二者均未使用市面上通用的许可证协议,而是分别自拟协议“LLAMA 2 COMMUNITY LICENSE AGREEMENT” 以及“TII Falcon LLM License”;同时二者都对其商业用途进行了额外约束。 + +**LLaMA2 的开源许可证区别** + +关于 LLaMA2 违背开源准则的讨论,主要来自于其较为独特的条款: + +- Llama2 开源模型不得用于月活 MAU 大于 7 亿的产品或服务平台,除非获得 Meta 公司的批准和授权; +- Llama2 开源模型不得以任何违反适用法律或法规(包括贸易合规法)的方式使用。同时不适用于除英语以外的语言中使用。 +- 其他大型语言模型(不包括 Llama2 或其衍生作品) + +开源促进会 (Open Source Initiative) 曾发布有关开源的十条定义,是国际目前较为认同的定义,LLaMA2 协议与其中两条产生冲突 + +- 不歧视个人或群体:Llama License 规定月活 7 亿以上的企业用户无法通过本 License 直接获取授权 +- 不歧视领域:许可证不得限制任何人在特定领域使用该程序。Llama License 禁止使用 LLaMA2 的输出结果去改善其他 AI 大模型,这就属于对使用领域的限制。同时 LLaMA2 对于语言的限制也导致了对中文使用领域的限制。 + +**Falcon 的开源许可证区别** + +TII Falcon LLM License 在 Apache License 的基础上做出了一些关键性的修改。Apache License 是一种广受欢迎的开源许可证,它对商业用途具有友好性,允许用户在满足一定条件后,将其修改后的代码作为开源或商业产品发布或销售。 + +Falcon 的许可证与 Apache License 的共同之处在于,它同样提供了对许可作品进行使用、修改和分发的宽泛权限,同时要求在分发过程中包含许可证文本和进行适当归属,此外还包含责任限制和担保的免责声明。 + +然而,TII Falcon LLM License 引入了额外的商业使用条款,要求商业应用在年收入超过 100 万美元时支付 10% 的授权费用。此外,它对发布或分发作品的方式也设置了更多的限制,例如强调必须归属于 “Falcon LLM technology from the Technology Innovation Institute”。 + +**开源大模型的开源目的与传统软件开源不同** + +以 LLaMA2 为例,其许可证本质上是一个指导框架,它主要面向那些打算在遵循 Meta 既定规范和标准的前提下,开发和部署 AI 系统的企业。此框架的目的是确保这些企业在开发和部署 AI 技术时,能够符合 Meta 设定的特定规则和标准。这样的做法有助于 Meta 管理其 AI 技术的应用范围和方式,进而维护其商业利益和品牌形象。 + +对于那些计划在 Meta 平台上进行 AI 开发的企业而言,LLaMa2 许可证可能构成了一项必须遵守的合规要求。这意味着这些企业在使用 Meta 提供的工具和资源来开发和部署 AI 模型时,必须遵循 Meta 的特定规范和要求。在此过程中,这些企业可能需要向 Meta 申请相应的授权,而 LLaMa2 许可证便是这种授权的一环。 + +#### 3.2.4 保障许可证可控的方式 + +**记录开源组件的使用情况** + +当企业或个人用户的软件达到一定规模后,对于内含的开源组件管理负担会变得较为沉重,从而引发由于不能及时管理而产生的侵权问题。根据 Synopsys,89% 的代码库包含至少已过期四年的开源代码,88% 的代码库包含在过去 2 年内未活动的组件,并且包含非最新版本的组件。很多情况下,开发者可能已经完全忘记了使用过哪些开源组件,在这些开源组件的许可证更新时无法及时做出反应,从而导致侵权问题的发生。因此,通过合理的方式对开源组件进行管理就成为非常必要的事情。 + +开发者可以在项目的文档中手动或自动维护一个详细的依赖清单,列出所有使用的开源组件及其版本信息。例如,在许多编程语言中,可以使用如 requirements.txt(Python)、package.json(Node.js)等文件来追踪依赖。 + +建立内部文档或知识库,记录关于使用的开源组件的所有相关信息,包括它们的来源、许可证信息以及使用方式,并且定时查看其许可证有无更新。在文档中详细追踪在何处使用了哪种开源组件,在代码对应位置添加注释以标明。在文档中添加对应许可证网站,定时查阅以及时发现许可证条款的变动。同时在编程中记录自己如何遵守了有效的许可条件。 + +对于较大体量的开发工作,手动记录的文本也许无法满足项目需求,此时可以使用相关工具,如代码成分分析(SCA)软件。这些工具可以自动识别和记录项目中使用的开源组件。它们通常能够提供详细的报告,包括组件的许可证信息、版本号以及可能的安全漏洞。 + +**谨慎使用辅助编码工具** + +智能编程助手如 ChatGPT 和 GitHub Copilot +通过分析大量的代码库和文档,提供编程建议和代码片段。尽管这些工具在提高编程效率方面极具价值,但在使用它们生成的代码时,需要考虑以下几个关键点来避免潜在的开源许可证侵权问题: + +- 源代码的许可证问题:辅助编程软件可能会根据其训练集中的代码生成建议。这些训练集可能包含来自不同开源项目的代码,而这些项目可能有各种不同的许可证要求。通常辅助编程结果不会索引对应的许可证,如果生成的代码段过于接近原始代码,并且使用者直接复制,可能会涉及到版权问题。 + +- 责任归属:使用由智能编程助手生成的代码时,需要明确,最终责任在于使用者。这意味着开发者应对生成的代码的合法性和适用性负责。因此,开发者定期进行代码审查,特别是对于使用辅助编程生成的部分,确保不违反任何开源许可证条款。 + +**并购过程中进行充分的代码审计** + +在并购过程中进行充分的代码审计是至关重要的,特别是为了避免涉及开源许可证的侵权问题。并购活动通常涉及对目标公司的资产进行全面评估,其中技术资产,尤其是软件资产,占据了重要的位置。在并购审计中需要着重注意下列问题: + +- 识别开源组件:代码审计的一个重要任务是识别目标公司产品中使用的所有开源组件。这包括直接使用的开源库和框架,以及间接依赖的开源软件。了解这些组件及其版本对于评估相关的许可证要求至关重要。 +- 审查许可证合规性:确认开源组件之后,需要对其相应的许可证进行审查。这包括确定这些许可证的类型、限制和义务。特别需要注意的是,某些许可证可能对商业使用有特定限制或要求公开修改后的源代码。 +- 评估风险和责任:在审计过程中,应评估由于未遵守开源许可证可能带来的法律和财务风险。这包括潜在的侵权诉讼、罚款或需要重构依赖于特定开源组件的产品部分。 +- 整合后的合规策略:并购完成后,需要有一个明确的计划来整合目标公司的代码库,并确保继续遵守所有相关的开源许可证要求。这可能涉及到在整个组织内实施新的代码管理和合规性监控流程。 +- 专业法律咨询:由于开源许可证可能非常复杂,获取专业的法律意见是至关重要的。专业律师可以帮助正确解读许可证条款,并提供关于如何处理潜在的许可证冲突的建议。 + +### 3.3 开源 AI 安全 + +随着大模型的火热,在上文提到的大模型许可证问题外, 更多的 AI 安全可控问题也逐步进入人们的视野。由于技术较新,没有明确的定义和规范,因此本段基于案头研究列举了当下相关从业人员较为关心的话题,希望引发读者思考,欢迎探讨与反馈。 + +#### 3.3.1 开源 AI 对数据安全提出新的要求 + +不同于传统数据安全,由于 AI 大模型的输出结果很大一部分取决于训练的数据集,因此数据集的质量、数据集是否包含恶意数据等问题对于 AI 大模型尤其是开源大模型尤为重要,因为开源大模型的数据集很多都是企业内部提供数据,清洗、监控、合规等无法做得像专业闭源大模型厂商那样专业。 + +**训练数据集处理不恰当会引发一系列偏差** + +数据偏见发生在数据集中的某些元素被过分强调或未得到充分代表。当基于这种带有偏见的数据来训练人工智能或机器学习模型时,可能导致结果出现偏差、不公平和不准确性。 + +* **选择性偏见**:一些面部识别系统,主要基于白人图像训练,对不同种族的面部识别准确率相对较低; +* **排除性偏见**:这种偏见通常在数据预处理阶段出现,如果数据基于刻板印象或错误假设,那么无论采用哪种算法,结果都将产生偏差; +* **观察者偏见**:研究人员可能会有意无意地将个人观点带入研究项目中,从而影响研究结果; +* **种族偏见**:当数据集偏向某个特定群体时,就会产生种族偏见; +* **测量偏见**:当用于训练的数据与实际世界中的数据不一致,或者错误的测量方法导致数据失真时,就会产生这种偏差。 + +这些偏差一旦被恶意使用,可能会导致输出结果产生明显的政治、种族偏向,或者数据错误,从而极大影响大模型的性能和公信力。 + +**选择开源底座大模型时应将训练数据源纳入考量范围** + +很多大模型训练数据源是直接从互联网上通过爬虫工具获得的,互联网上普遍存在着歧视性、仇恨和攻击性的言论和信息。在实际使用中,人们对负面信息的阅读、评论、点赞和传播远超过正面信息。因此,人类生成的信息源长期以来都处于一种较为混乱和不健康的状态。这种环境下,大型模型可能会因受到这些数据的影响,而助长种族歧视和虚假信息的传播。 + +一旦大模型底座的数据源遭到污染,即使企业本身微调使用的数据源很完美,也会导致最终输出的结果产生重大偏误。因此在选择底座大模型时,使用者不应只考虑大模型的性能,而应当将训练数据的来源也纳入考量。应当注重那些以负责任的方式从多元来源处选择标注数据集的大模型,同时将偏见最小化视为整个模型构建过程中甚至部署之后需要重点考虑的因素。 + +#### 3.3.2 开源 AI 大模型的大量使用引发对于社会伦理的思考 + +**大模型幻觉问题可能导致严重后果** + +目前的大模型存在一个尚未解决的问题——幻觉。根据哈工大赛尔实验室,幻觉指 “文本生成任务中,有时会生成不忠实或无意义的文本 "。虽然幻觉文本不忠实并且无意义,但是由于大模型强大的上下文生成能力,这些文本的可读性往往非常高,让读者以为它们是基于提供的上下文,尽管实际上很难找到或验证这种上下文真实存在。这种现象与难以与其他“真实”感知区分的心理幻觉类似,一眼看上去也很难捕捉到幻觉文本。 + +幻觉的种类有很多,并且随着大模型使用范围的扩大还在不断涌现。常见的幻觉主要有以下几种: + +- **逻辑错误**:大模型在推理过程中出现了逻辑上的错误,从而导致输出的内容看似合理,但经不起推敲; +- **捏造事实**:大模型的数据库本身不支持其回答这个问题,但是由于大模型无法对自身的边界进行定义,因此会自信地断言一些根本不存在的事实; +- **数据驱动偏见**:正如上一部分所讲,由于某些数据的普遍存在,模型的输出可能会偏向某些方向,导致错误的结果。 + +大模型幻觉导致的错误输出,可能会使某些对其深信不疑的使用者受到伤害。在 2023 年 5 月 16 日,世界卫生组织发布了对使用大型人工智能语言模型工具的谨慎声明。他们指出,尽管这些工具在获取健康信息方面提供了便利,特别是在资源匮乏的地区可能增强诊断的效率,但使用它们时需要严格评估潜在风险。世界卫生组织进一步强调,如果匆忙使用未经充分测试的系统,可能导致医疗专业人员犯错误,给患者带来伤害,并减少人们对人工智能技术的信任,这可能会损害或推迟这类技术在全球范围内的潜在长期好处和应用。 +
+ +
+ +| ![image037](/image/commercialization/chapter_3/3-9.png) | +|-------------------------------------------------------| + +
+ +
图 3.9 哈尔滨工业大学对于幻觉的分类
+
+ +由于目前对于大模型尚未有明确的责任主体,对于开源大模型更是如此,因此一旦产生严重后果,受到损失的使用者将很难维权,其损失也很难被缓解。目前关于这方面有 2 个急需解决的问题: + +- 如何能更好地解决大模型的幻觉问题——技术层面 +- 如何更清晰地界定大模型的责任主体——法律层面 + +**大模型的输出可能会输出违反道德法律的内容** + +目前部分大模型缺乏内容过滤机制,导致输出的内容存在违反当地法律法规、公序良俗的情况,主要包含以下几种情况: + +版权问题:大型语言模型可能会生成包含或类似于受版权保护材料的内容。例如,模型可能会创建与已存在的文学作品、歌词、电影剧本等相似的文本。这样的生成物可能侵犯原始作者或版权持有者的权利,从而导致法律纠纷; + +地域法规:不同国家和地区有其独特的法律体系。例如,某些国家对于互联网内容的审查更为严格,如对政治敏感内容、宗教信息或性别议题的特定表达有明确禁令。大模型在这些区域运行时,生成的内容必须遵守当地法律。例如,当有人向大型语言模型询问 “野生娃娃鱼的烹饪方法” 时,模型回答了“红烧”,甚至提供了详细步骤。这种回答可能会误导提问者。事实上,野生娃娃鱼是国家二级保护动物,不应被捕捉、杀害或食用。 + +诽谤和错误信息:如果模型生成的内容包含对个人或组织的错误指控或诽谤性言论,可能会导致法律诉讼。这对于确保内容的准确性和合法性提出了高要求。 + +为了确保符合各种法律要求,使用大型语言模型的组织可能需要建立监管机制,比如对生成的内容进行审核,确保其不违反任何法律规定。尤其是企业使用的开源模型,它们相对而言对于内容输出的审查更为宽松,企业需要额外注重相关问题,防止陷入法律纠纷而带来损失。在此同样可以归纳为 2 个问题: + +- 如何加强大模型的信息过滤机制——技术层面 +- 如何界定大模型输出内容是否侵权、违法——法律层面 + +**大模型可能会加剧社会割裂** + +北京计算机学会数字经济专委会秘书长曾表示:对于那些缺乏批判性思考和分析能力、对付费知识和医疗服务了解不足的人群来说,大型语言模型(LLM)潜在的安全问题尤其引人关注。随着互联网用户数量的剧增和移动设备如手机的广泛使用,低教育和低收入人群越来越多地依赖这些途径来获取医疗、教育和日常生活咨询。然而,大型生成式语言模型可能会加剧对这些边缘化群体的歧视性描述和社会偏见,深化社会分裂,增加误导性、恶意信息的危害,并提高个人真实信息被泄露和滥用的风险。 + +大模型的使用就像一把双刃剑,一面可以重新整合网络资源,提高信息的收集效率;一面由于幻觉等问题可能会加剧信息壁垒,导致很多信息来源匮乏的人群受到误导。在这一点上有 2 个需要解决的问题: + +- 加强公众的教育,大模型并非万能,需审慎看待——社会宣传层面 +- 如何保证大模型训练数据集的质量,降低其偏见性——技术层面 + +## 四. 开源项目资本市场情况 + +### 4.1 全球市场状况 + +#### 4.1.1 2023 全球 VC 投资规模减小,但 AIGC 是万众焦点 + +2023 年以来,由于不断增长的利率、严峻的经济形势、地缘冲突、对国际金融体系稳定性的担忧,全球金融市场波动加剧,这也导致全球 VC 资本市场情况不容乐观。据毕马威统计,截至 2023 年 Q3,全球风险投资活动已经连续下降了七个季度(见图 4.1)。 +
+ +
+ +| ![image038](/image/commercialization/chapter_4/4-1.png) | +|-------------------------------------------------------| + +
+ +
图 4.1 全球风险投资活动(数据源:KPMG)
+
+ +在股票市场下跌的背景下,处于保持投资组合比例的需求,基金经理们普遍降低对于私募股权资产的配置;同时由于风险资本本身较大的波动性以及未来全球经济形势的不明朗,2023 年风险资本的募资规模较往年降幅较大。相较于过去五年间(2018-2022)年均 2500 亿美元以上的规模,截至 2023Q3 风险资本承诺投资金额仅有 1106 亿美元(根据毕马威)。叠加连续下降七个季度的风险投资活动的趋势,2023Q4 及全年募资规模将大幅缩水。 +
+ +
+ +| ![image039](/image/commercialization/chapter_4/4-2.png) | +|-------------------------------------------------------| + +
+ +
图 4.2 全球风险资本募资规模(数据源:KPMG)
+
+ +在估值层面,投资者的谨慎情绪也逐渐增强。相比于 2021、2022 年,溢价融资比例减少约 10%,平价、折价融资的比例均上涨 5% 左右,这对于早期资本的退出造成了障碍。 +
+ +
+ +| ![image040](/image/commercialization/chapter_4/4-3.png) | +|-------------------------------------------------------| + +
+ +
图 4.3 全球风险投资溢价、平价、跌价投资比例(数据源:KPMG)
+
+ +然而,在大环境整体不乐观的背景下,AIGC 相关融资却成为全球焦点,相关融资规模大幅增长。在北美,2023 年的独角兽企业中 AI 相关公司最多,包括 AI Agent 初创公司 Imbue、AI + 生物技术公司 TrueBinding、生成式 AI 公司 Runway 以及自然语言处理公司 Cohere;在欧洲,尽管整体融资放缓,但是 AI 公司表现则格外突出,大量初创公司获得资金,例如法国 AI 平台公司 Poolside;亚洲投资人对于 AI 的兴趣也不断攀升,但相关国家监管机构对生成式 AI 的监管力度也在不断加大。 + +预计伴随着 AI 技术的快速迭代,大模型、AI Agent 等概念的不断火热,AI 领域相关投融资会较小受到全球风投规模收缩的影响。 + +#### 4.1.2 全球开源融资情况 + +近年来,商业开源公司的发展速度令人瞩目,这些公司的总市值从 100 亿美元迅速增长,突破了 5,000 亿美元的大关。这一显著增长不仅展示了开源技术在商业领域的巨大潜力,也反映了投资者对于开源模式的高度认可和信任。根据 OSS Capital 的预测,商业开源公司的市值有望在未来达到惊人的 3 万亿美元。 + +在过去四年里,开源商业领域的发展表现出了稳健的增长。这一时期内,超过 400 家初创公司进行了大约 700 轮次的融资,总额达到了 290 亿美元。具体来看,年度融资规模从 2020 年的 2.7 亿美元增加到了 2023 年的 125 亿美元,年复合增长率为 255%。 + +尽管 2022 年融资规模呈现下降趋势,但这种趋势在 2023 年得到了缓解。从 2023 年 2 月开始,融资规模开始逐步回升。2023 年的前 11 个月,融资总额已经超过了 2022 年全年的数额。不过,受到地缘政治冲突和疫情后经济复苏的影响,全年融资规模的波动性有所增加。在 3 月、5 月和 9 月,融资额达到了峰值,约为 20 亿美元左右,而在 6 月和 8 月则低于平均水平。 + +即使在 2023 年融资规模最低的月份,月融资额 3.86 亿美元也超过了 2021 年最高月融资额,甚至超过了 2020 年全年的融资总额(2.72 亿美元)。这一趋势反映出资本市场对开源商业的持续关注和认可。这一明显的融资增长趋势表明,资本市场对开源商业的兴趣和信心不断增强。投资者们看重的不仅是开源模型的创新潜力和技术优势,还有其在市场上的可持续性和长期增长潜力。 +
+ +
+ +| ![image041](/image/commercialization/chapter_4/4-4.png) | +|-------------------------------------------------------| + +
+ +
图 4.4 全球 VC 基金投资到商业化开源软件公司的资金量(数据源:OSS Capital)
+
+ +从各轮次融资规模角度分析,资本更加青睐 B、C、D 等中期融资。这反映出商业开源公司的特征:早期技术细节尚不明确、商业模式不清晰;但当逐步跨越初创阶段后,商业开源公司将爆发出较为强劲的增长动力,吸引更多资本的进入;在后期商业模式逐渐成熟,开源产品打响知名度,产生稳定的现金流,对于融资的需求便有所降低。 +
+ +
+ +| ![image042](/image/commercialization/chapter_4/4-5.png) | +|-------------------------------------------------------| + +
+ +
图 4.5 商业化开源软件公司融资轮次分布(百万美元)(数据源:OSS Capital)
+
+ +在过去的 4 年中,共有 328 家商业开源公司获得了超过 1,000 万美元的融资。其中主要集中于 1,000-5,000 万美元间,在 1,000-2,000 万美元、2,000-5,000 万美元的融资共 210 轮次,占全部轮次的 64%。5,000 万 - 1 亿美元、1 亿 - 2 亿美元的轮次分别有 49、46 次,占全部轮次的 29%。共有 23 家公司获得 2 亿美元以上的融资,其中有 2 家甚至获得了超过 5 亿美元的单笔融资。 +
+ +
+ +| ![image043](/image/commercialization/chapter_4/4-6.png) | +|-------------------------------------------------------| + +
+ +
图 4.6 商业化开源软件公司累计融资规模分布(数据源:OSS Capital)
+
+ +### 4.2 中国市场状况 + +#### 4.2.1 中国股权资本市场发展情况概括 + +**新成立基金数量及规模有所下降,但整体趋势逐渐向好** + +在 2023 年上半年,(PE/VC)市场共成立了 3,930 支新基金,相较于去年同期的 4,456 支新成立基金,减少了 12%。在这一时期,新成立基金的总规模达到了 3,642 亿美元,与去年同比减少了 3%。尽管相比于去年规模和数量有所下降,但二季度的表现比一季度好,整体呈现好转的趋势:具体来看,第一季度新成立基金规模为 1,614 亿美元,同比下降近 20%;而第二季度则录得 2,028 亿美元,同比增长 16%。 +
+ +
+ +| ![image044](/image/commercialization/chapter_4/4-7.png) | +|-------------------------------------------------------| + +
+ +
图 4.7 国内私募股权基金认缴规模与数量(数据源:投中网、KPMG)
+
+ +**人民币基金规模增加,外币基金规模显著减小** + +在 2023 年上半年,新成立的人民币基金数量为 3,840 支,与去年同期相比下降 13%。人民币基金的总规模达到了 3,395 亿美元,相较于去年同期实现了 13% 的增长。外币基金的规模为 247 亿美元,同比大幅下降 67%。尽管 2023 年外币基金的数量有所增加,但由于多数为小额基金,其对总规模的影响较小。 + +这一趋势表明,国内股权投资市场更倾向于人民币基金的投资风格:更加保守,且对被投企业的稳定性要求较高。对于国内的开源商业初创公司来说,仅仅追随市场热点已不足以吸引投资。技术实力和长期增长潜力成为评估是否进行进一步投资的关键因素。 +
+ +
+ +| ![image045](/image/commercialization/chapter_4/4-8.png) | +|-------------------------------------------------------| + +
+ +
图 4.8 国内私募股权人民币基金规模及数量(数据源:KPMG)
+
+ +
+ +| ![image046](/image/commercialization/chapter_4/4-9.png) | +|-------------------------------------------------------| + +
+ +
图 4.9 国内私募股权外币基金规模及数量(数据源:KPMG)
+
+ +**经济复苏不及预期,整体投资数量与规模下降** + +经济复苏根基不稳、整体需求放缓、外部市场不稳定等宏观背景下,2023 年 H1 股权市场总投资 3,750 笔,同比下降 31%;总投资金额供给 569 亿美元,较去年同期下降 6%,相比于融资侧新成立基金规模下降 3%,在投资侧显示出更强的收缩,进一步说明了投资人的谨慎情绪,这与国际市场所表现出的趋势一致。 +
+ +
+ +| ![image047](/image/commercialization/chapter_4/4-10.png) | +|-------------------------------------------------------| + +
+ +
图 4.10 国内股权市场投资金额及数量(数据源:KPMG)
+
+ +#### 4.2.2 国内开源生态稳步发展 + +**开源产业各方面生态逐渐完善,稳步发展** + +目前国内的开源产业正在经历顶层设计与产业进步并举,人才储备与技术创新并重的发展格局,从法律法规、政策支持、竞赛选拔、产业链各环节各方面共同进步。 + +在法律法规方面,2023 年 11 月 2 日上海对外经贸大学人工智能与变革管理研究院副院长、上海开源信息技术协会秘书长张国锋,在 2023 开源产业生态大会媒体沟通会上表示,上海开源产业的规划和政策在起草和推动中,上海一定要抓住历史性机遇,积极参与数字治理、数字公共产品国际合作(消息来自澎湃新闻);在政策支持角度,在 2023 全球开源技术峰会(GOTC)上,上海市开源产业服务平台正式宣布启动:上海浦东软件园与 Linux 基金会亚太区签约,正式落地 Linux 基金会亚太区开源社区服务中心,并与开源中国签订战略合作协议,共建上海开源生态(消息来自文汇报)。在竞赛选拔方面,我国已经有 “中国软件开源创新大赛”、“OpenHarmony 竞赛训练营” 等一系列开源竞赛,吸引了上海交通大学、复旦大学等国内诸多高校的学生参赛,在竞赛中涌现出大量创新亮点,充分体现了开源生态蓬勃共建的良好势头和巨大潜力。 + +开源产业链各环节都繁荣发展。在人工智能领域,众多公司开源了基座大模型,其中包括阿里巴巴开源了通义千文,幻方量化开源了 DeepSeek 等等。创业公司中百川智能、智谱 AI、零一万物等都分别发布了多款自己训练的基座大模型,值得一提的是,这些公司都被资本市场看好,分别在今年进行了一次或多次高额融资。在开发者工具层,不少已经深耕的创业项目外,也有新的玩家进入,并且已经有产品在做全球化尝试。可预见未来,在应用层,开源 AI 的应用也有机会迎来更多机会。 + +在底层操作系统领域,大型公司正推动操作系统的国产化进程,其中包括由阿里巴巴开发的 Anolis OS 龙操作系统开源社区和开放原子开源基金会支持的 openEuler 社区等。在云原生、大数据、人工智能、前端技术等多个关键领域,这些大型企业也有显著的开源项目布局。例如,蚂蚁集团的企业级 UI 设计工具 ant-design,百度的深度学习平台 PaddlePaddle,以及数据可视化图表库 Apache Echarts 等,其在 GitHub 社区都拥有广泛的影响力和庞大的用户基础。 + +在大数据和数据库行业中,针对国内外市场产生的庞大且多样化的数据,以及不断增长的数据处理需求,众多初创企业正在积极进行战略布局。例如,PingCAP 推出了分布式关系型数据库 TiDB 和分布式键值数据库 TiKV;涛思数据开发了时序数据库 TDengine;SphereEx 则推出了分布式数据库中间件 ShardingSphere 等。随着人工智能技术的发展,AI 领域也涌现出了创新性的产品,比如 Zilliz 面向 AI 应用开发的向量数据库,以及 Jina.ai 的神经搜索引擎,能够实现全类型内容的搜索。 +
+ +
+ +| ![image048](/image/commercialization/chapter_4/4-11.png) | +|-------------------------------------------------------| + +
+ +
图 4.11 国内 AI 相关科技企业开源项目及开源企业图谱(部分)
+
+ +**魔搭已成国内大模型开源第一门户,标志着中国开源 AI 社区建设逐步成长** + +魔搭社区是阿里达摩院联手中国计算机学会(CCF)开源发展委员会推出的 AI 模型社区, 旨在打造下一代开源的模型即服务共享平台,致力降低 AI 应用门槛。自推出以来,规模迅速扩大:目前社区已有超过 2,300 个模型,开发者超过 280 万,模型下载次数超过 1 亿次。百川智能、智谱 AI、上海人工智能实验室、IDEA 研究院等头部大模型机构都将魔搭作为其开源模型首发平台。 + +魔搭社区秉持 “模型即服务” 理念,把 AI 模型当作生产的重要元素,从模型预训练到二次调优,最后到模型部署,围绕模型的生命周期来提供相应的服务。相比于国外社区 Hugging Face,魔搭更关注本土需求,提供大量中文模型,更能推动国内相关 AI 场景的落地应用。 +
+ +
+ +| ![image049](/image/commercialization/chapter_4/4-12.png) | +|-------------------------------------------------------| + +
+ +
图 4.12 截至目前,魔搭社区已经拥有包含 LLM、零样本学习等 11 个模型大类
+
+ +魔搭社区的成立与快速发展为中国开源社区文化树立了标杆,有利于进一步推动开源文化在国内的传播,吸引更多有创造力、有开源精神的技术创造者、技术使用者的加入,促进中国开源事业的进一步繁荣。 + +#### 4.2.3 国内开源公司融资保持热度 + +2023 年市场热度保持,多起大额投资进行,部分初创公司一年内多次融资,反映出投资人的兴趣高涨。开源中国为一家开源社区平台公司,收录全球知名开源项目近 10 万款,旗下有开源社区 Landscape 以及日本老牌开源社区 OSDN,同时拥有代码托管平台 Gitee,为目前国内规模领先的代码托管服务平台,获得了 7.75 亿元的 B + 轮战略融资;飞轮科技研发和推广开源实时数据仓库 Apache Doris,为 Apache Doris 用户提供技术支持商业服务,获得新一轮数亿元融资,截至目前总融资规模已近 10 亿元;澜舟科技以 NLP 技术为基础,提供新一代认知智能平台,完成 Pre-A + 轮投资,在不到一年时间内,总融资规模达到数亿元。 + +目前中国开源生态发展仍处于较早期阶段,2023 年融资事件主要集中在 B 轮及以前,涉及人工智能、开源社区、数据仓库与大模型平台等领域,市场机会广阔。 + +
+表 4.1 国内开源软件初创公司投融资情况(右滑查看完整内容) +
+
+(Github 数据统计截至 2023 年 12 月 7 日) +
+ + +|**公司** | **开源项目** | **公司业务** | **最新一轮融资轮次** | **最新一轮融资金额** | **最新一轮融资时间** | **GitHub Star** | **GitHub Fork** | +|--------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------|--------------|--------------|--------------|------------------|------------------| +| **支流科技** | Apache APISIX | 微服务 API 网关 | A + 轮 | 数百万美元 | 2021 年 6 月 | 10.8k | 2k | +| **白鲸开源** | Apache DolphinScheduler | 云原生 DataOps 平台 | Pre-A 轮 | 数千万元 | 2022 年 7 月 | 9.4k | 3.5k | +| **飞轮科技** | Apache Doris | 云原生实时数仓 | Pre-A 轮 | 数亿元 | 2023 年 6 月 | 6.5k | 1.9k | +| **偶数科技** | Apache HAWQ | Hadoop SQL 分析引擎 | B + 轮 | 近 2 亿元 | 2021 年 8 月 | 672 | 324 | +| **天谋科技** | Apache IoTDB | 时序数据库系统 | 天使轮 | 近亿元 | 2022 年 6 月 | 2.8k | 750 | +| **跬智信息技术** | Apache Kylin | 大数据联机分析处理引擎 | D 轮 | 7000 万美元 | 2021 年 4 月 | 3.4k | 1.5k | +| **StreamNative** | Apache Pulsar | 分布式消息队列 | A + 轮 | - | 2023 年 | 12k | 3.2k | +| **SphereEx** | Apache ShardingSphere | 分布式数据库可插拔生态 | Pre-A 轮 | 近千万美元 | 2022 年 1 月 | 17.7k | 6.1k | +| **安托盟丘(AutoMQ)** | automq-for-rocketmq automq-for-kafka | 流存储软件和消息队列 | 天使轮 + | 数千万人民币 | 2023 年 11 月 | 195 | 34 | +| **智谱 AI** | ChatGLM | 大预言模型 | B++++ | 12 亿人民币 | 2023 年 9 月 | 36.3k | 4.9k | +| **潞晨科技** | Colossal-AI | 高性能企业级 AI 解决方案 | 天使轮 | 600 万美元 | 2022 年 9 月 | 6.8k | 637 | +| **Chatopera** | cskefu | 多渠道智能客服系统 | 天使轮 | 数百万元 | 2018 年 8 月 | 2.2k | 742 | +| **数变科技** | Databend | 云数仓 | 天使轮 | 数百万美元 | 2021 年 8 月 | 4.8k | 500 | +| **Dify.AI** | Dify | LLMOps 平台 | 出资设立 | 未披露 | 44986 | 11.8k | 1596 | +| **映云科技** | EMQX | MQTT 消息中间件 | B 轮 | 1.5 亿元 | 2020 年 12 月 | 10.8k | 1.9k | +| **TensorChord** | Envd | MLOps | 种子轮 | 数百万美元 | 2022 年 11 月 | 1.3k | 102 | +| **燧炻科技** | FydeOS | 基于 Chromium 的操作系统 | Pre-A 轮 | 数千万元 | 2022 年 2 月 | 1.5k | 192 | +| **泛化智能** | GAAS | 无人机自主飞行方案 | - | 未披露 | 2018 年 10 月 | 1.7k | 411 | +| **GeekCode** | Geekcode.cloud | 云开发环境 | 种子轮 | 数百万人民币 | 2022 年 4 月 | 42 | 2 | +| **Gitee** | git | Git 代码托管 | B + 轮 | 7.75 亿 | 2023 年 7 月 | - | - | +| **极狐** | GitLab | DevOps 工具平台 | A++ 轮 | 数千万元 | 2022 年 9 月 | - | - | +| **白海科技** | IDP | AI 数据开发平台 | 种子轮 | 数千万元 | 2021 年 12 月 | 17 | 3 | +| **艾拉云科** | illa-builder | 低代码开发平台 | 天使轮 | 数百万美元 | 2022 年 9 月 | 2.3k | 126 | +| **极纳科技** | Jina | 多模态神经网络搜索框架 | A 轮 | 3000 万美元 | 2021 年 11 月 | 16.8k | 2k | +| **Juicedata** | JuiceFS | 分布式文件系统 | 天使轮 | 数百万元 | 2018 年 10 月 | 7.1k | 605 | +| **谐云科技** | Kingdling | 容器云产品及解决方案 | B + 轮 | 超亿元 | 2022 年 1 月 | 270 | 56 | +| **飞致云** | JumpServer | 云计算及 DevOps | D + 轮 | 1 亿元 | 2022 年 4 月 | 19.5k | 4.8k | +| **才云科技** | Kubernetes | 容器云平台 | 并购 - 字节 | 未披露 | 2020 年 7 月 | 94.1k | 34.5k | +| **泽拓科技** | Kunlun | 分布式数据库 | 天使轮 | 数千万元 | 2021 年 8 月 | 112 | 15 | +| **深之度科技** | LinuxDeepin | Linux 操作系统 | B 轮 | 数千万元 | 2015 年 4 月 | 413 | 70 | +| **矩阵起源** | Matrixone | 数据智能 | 天使 + 轮 | 数千万美元 | 2021 年 10 月 | 1.3k | 212 | +| **澜舟科技** | Mengzi | 大语言模型 | Pre-A+ 轮 | 数亿元人民币 | 2023 年 3 月 | 530 | 61 | +| **Zilliz** | milvus | 向量搜索引擎 | B + 轮 | 6000 万美元 | 2022 年 8 月 | 14.4k | 1.9k | +| **欧若数网** | Nebula | 分布式图数据库 | Pre-A + 轮 | 近千万美元 | 2020 年 11 月 | 8.3k | 926 | +| **悦数科技** | NebulaGraph | 分布式图数据库 | A 轮 | 数千万美元 | 2022 年 9 月 | 9.7k | 1.1k | +| **一流科技** | oneflow | 深度学习框架 | 并购 - 美团 | - | 2023 年 | 4.1k | 478 | +| **面壁智能** | OpenBMB | 大模型应用 | 种子轮 | 未披露 | 2021 年 8 月 | 359 | 49 | +| **易捷行云** | OpenStack | IaaS | E 轮 | 未披露 | 2021 年 7 月 | 4.6k | 1.6k | +| **原语科技** | PrimiHub | 隐私计算 | 天使轮 + | 千万级 | 2022 年 10 月 | 263 | 60 | +| **好雨科技** | Rainbond | 企业应用云操作系统 | Pre-A 轮 | 数百万元 | 2016 年 8 月 | 3.6k | 664 | +| **快用云科** | QuickTable | 无代码数据建模工具 | - | 未披露 | 2021 年 8 月 | 7 | 3 | +| **睿赛德科技** | RT-Thread | 物联网操作系统 | - | 未披露 | 2020 年 1 月 | 7.6k | 4.2k | +| **巨杉数据库** | SequoiaDB | 分布式关系型数据库 | D 轮 | 数亿元 | 2020 年 10 月 | 305 | 115 | +| **边无际科技** | Shifu | 物联网软件开发框架 | A 轮 | 未披露 | 2022 年 6 月 | 205 | 21 | +| **鼎石纵横** | StarRocks | MPP 分析型数据库 | B 轮 | 未披露 | 2022 年 1 月 | 3.6k | 793 | +| **石原子科技** | StoneDB | 实时 HTAP 数据库 | 天使轮 | 数千万元 | 2022 年 2 月 | 639 | 100 | +| **TabbyML** | TabbyML | 开源 AI 编程助手 | 种子轮 | 未披露 | 45108 | 13.9k | 515 | +| **太极图形** | Taichi | 数字内容创作基础设施 | A 轮 | 5000 万美元 | 2022 年 2 月 | 21.7k | 2.1k | +| **钛铂数据** | Tapdata | 实时数据服务平台 | Pre-A + 轮 | 数千万美元 | 2021 年 7 月 | 223 | 52 | +| **涛思数据** | TDengine | 时序空间大数据引擎 | B 轮 | 4700 万美元 | 2021 年 5 月 | 20.1k | 4.6k | +| **PingCAP** | TiDB | 分布式数据库 | E 轮 | 未披露 | 2021 年 7 月 | 32.9k | 5.3k | +| **数字天堂** | uni-app | Vue 语法的统一前端框架 | B + 轮 | 未披露 | 2018 年 9 月 | 37.4k | 3.4k | +| **灵奥科技** | Vanus | 大模型中间件 | 种子轮 | 数百万美元 | 45108 | 2.2k | 110 | +| **未来速度** | Xorbits | 分布式数据科学计算框架 | 天使轮 | 数百万美元 | 44958 | 933 | 58 | +| **乐维软件** | Zabbix | IT 运维管理 | A 轮 | 未披露 | 2022 年 11 月 | 2.6k | 766 | +| **KodeRover** | Zadig | 云原生软件交付云 | Pre-A 轮 | 数千万元 | 2021 年 8 月 | 1.8k | 636 | +| **易软天创** | zentaopms | Agile 项目管理 | A 轮 | 数千万元 | 2021 年 10 月 | 946 | 275 | +| **云轴信息** | ZStack | IaaS | - | 未披露 | 2021 年 3 月 | 1.2k | 380 | + + +
+ +
+表 4.2 国内开源大模型初创公司投融资情况(右滑查看完整内容) +
+
+(Hugging Face 数据统计截至 2023 年 12 月 7 日) +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
公司名 最近融资轮次 最近融资时间 最近融资量 模型介绍 模型名 likesdownload
百川智能 A 轮 2023-10-17 00:00:003 亿美元 在知识问答、文本创作领域表现突出 Baichuan-7B795102k
Baichuan-13B-Chat6128.29k
Baichuan2-13B-Chat321133k
智谱 AIB+++++ 轮 2023-09-19 00:00:0012 亿人民币 多模态理解、工具调用、代码解释、逻辑推理 ChatGLM-6B2.67k56.8k
ChatGLM2-6B1.91k97.7k
ChatGLM3-6B501104k
元语智能 出资设立 2022-11-24 00:00:00 功能型对话大模型 ChatYuan-large-v2171669
ChatYuan-large-v1108120
ChatYuan-7B93
面壁智能 天使轮 2023-04-14 00:00:00 数千万人民币 大语言模型,包括包括文字填空、文本生成、问答 cpm-bee-10b15819
cpm-ant-10b2212.6k
cpm-bee-1b127
澜舟科技 Pre-A + 轮 2023-03-14 00:00:00 数亿人民币 处理多语言、多模态数据,文本理解、文本生成 mengzi-t5-base411.42k
mengzi-bert-base321.46k
mengzi-t5-base-mt1744
虎博科技 A 轮 2019-03-01 00:00:003300 万美元 多语言任务大模型,覆盖生成、开放问答、编程、画图、翻译、头脑风暴等 15 大类能力 tigerbot-70b-chat-v2401.68k
tigerbot-180b-research3312
tigerbot-70b-base-v1153.25k
深势科技 C 轮 2023-08-18 00:00:00 超 7 亿人民币 高精度蛋白质结构预测模型 Uni-Fold-Data6
三维分子预训练模型 Uni-Mol-Data3
元象 XVERSEA + 轮 2022-03-11 00:00:001.2 亿美元 大语言模型,具备认知、规划、推理和记忆能力 XVERSE-13B11742
XVERSE-13B-Chat42412
XVERSE-65B356.18k
零一万物 天使轮 2023-11-06 00:00:00 通用型 LLM,其次是图像、语音、视频等多模态能力。Yi-34B1.07k109k
Yi-6B30326.7k
Yi-34B-200K1074.55k
+ diff --git a/data.md b/data.md new file mode 100644 index 0000000..f50fe94 --- /dev/null +++ b/data.md @@ -0,0 +1,1519 @@ +--- +outline: deep +--- + +# 数据篇 + +## 概述 + +2023 中国开源年度报告以深入全面的数据洞察为基础,共分为八大部分。第一部分**总体宏观洞篇**,通过对基础事件、活跃仓库、活跃用户、开源许可证和编程语言等方面的深入分析,揭示中国在全球开源生态中的全貌。第二部分 **OpenRank 排行榜篇**,提供了全球和中国各领域开源项目、企业、基金会、开发者以及协作机器人的排名,为业界提供全面系统的 OpenRank 指标信息服务。第三和第四部分为**企业洞察篇**和**基金会洞察篇**,通过演变图和趋势分析,呈现了全球和中国企业、基金会在开源领域中的演化。第五部分**技术领域洞察篇**,深入研究了各领域 Top10 榜单和项目变化情况,展示了前沿技术的发展方向和趋势。第六部分**开源项目洞察篇**,深入探讨了不同项目类型、领域和主题的多样性和创新方向。第七部分**开发者洞察篇**,则通过对开发者类型、工作时间分布、地区分布和机器人使用情况的分析,展现了开发者群体的多样性和工作特征。第八部分**案例分析篇**,通过一系列有趣的案例分析,从一个侧面让读者一窥中国开源生态的蓬勃发展。整体而言,数据篇通过丰富多彩的数据洞察与分析,勾勒出中国开源生态在 2023 年的全景图。 + +### 指标介绍 + +**OpenRank** + +OpenRank 指标是由 X-lab 开放实验室所研发,基于开源开发者-项目协作关系网络构建的协作网络指标,不仅能够很好的表征项目的整体发展状态、社区参与度,同时也引入了开源生态的要素,能够很好地将开源生态中位于关键协作位置的项目、人、组织等实体识别并展示出来。OpenRank 目前已经得到了工业界和学术界的广泛认同,被包括中国标准化研究院系列开源治理标准、信通院开源治理白皮书、开放原子开源基金会全球开源大屏、企业开源办公室治理工具箱等所广泛采纳。 + +关于该指标的定义请参考: + +[1] [Shengyu Zhao et al: OpenRank Leaderboard: Motivating Open Source Collaborations Through Social Network Evaluation in Alibaba. ICSE, 2024](https://www.researchgate.net/publication/376686121_OpenRank_Leaderboard_Motivating_Open_Source_Collaborations_Through_Social_Network_Evaluation_in_Alibaba) + +[2] [赵生宇: 如何评价一个开源项目(三)价值流网络, 2021](https://blog.frankzhao.cn/how_to_measure_open_source_3) + +[3] 工业和信息化部标准化研究院: 《信息技术 开源治理 第3部分:社区治理与运营》[T/CESA 1270.3-2023]、《信息技术 开源治理 第 5 部分:开源贡献者评价模型》[T/CESA 1270.5-2023], 2023 + + +**活跃度(Activity)** + +活跃度是 X-lab 研发的评价项目或开发者活跃程度的统计性指标。开发者活跃度由开发者 Issue、PR 及代码 Review 等行为加权得到。项目活跃度由项目中所有开发者活跃度总和进行数值处理后得到。 + +关于该指标的定义请参考: + +[1] [Xiaoya Xia et al: Exploring activity and contributors on GitHub: Who, what, when, and where. APSEC, 2023](https://ieeexplore.ieee.org/abstract/document/10043221) + +[2] [赵生宇:如何评价一个开源项目(一)——活跃度,2021](https://blog.frankzhao.cn/how_to_measure_open_source_1) + +### + +## 一. 总体宏观洞察 + +### 1.1 基础事件 + +**基础事件**是本数据篇分析的数据基础,是指全球化开源协作平台上(如 GitHub、Gitee 等)由于开发者活动行为所产生一系列事件日志数据。对基础事件的统计分析,可以宏观洞察全球开原生态发展的态势。本次开源年度报告所涉及的开源协作平台包括 GitHub、Gitee 以及 GitLink。 + +#### 1.1.1 GitHub 全域事件趋势 + +首先,统计分析全域 GitHub 的事件⽇志总体数量,如下图所示。 + + +![1-1](/image/data/chapter_1/1-1.png) + +
图 1.1 GitHub 年度事件数趋势
+
+ +可以看到近⼏年全球开源的总体活跃情况和活跃仓库数量都在明显上升,显⽰了全球开源发展中的增速。2023 年 GitHub ⽇志数据达到了 14 亿,相⽐ 2022 年增⻓了约 10.32%。在经过了 2018-2020 的高增长后,GitHub 平台的年度事件增长数量逐渐下降,2023 年的增长率为 10% 左右。但由于整体体量的关系,10% 的增长率还是一个非常大的数字,继续凸显出开源科技的发展在全球数字化转型中的活跃与关键地位。 + +#### 1.1.2 GitHub 和 Gitee 的总事件数趋势比较 + +由于 GitHub 平台活跃事件的庞大,接下来的分析工作,建立在每个平台的前 3 万个活跃仓库的基准之上。为了方便比较,我们选取了 GitHub 与 Gitee 中与开源参与相关性较大的 8 类事件进行统计分析,包括 CommitCommentEvent、ForkEvent、IssueCommentEvent、IssuesEvent、PullRequestEvent、PullRequestReviewCommentEvent、PushEvent 和 WatchEvent。 + +![1-2](/image/data/chapter_1/1-2.png) + + +
图 1.2 GitHub 与 Gitee 活跃仓库事件数
+
+ +发现 Gitee 平台呈现更为显著的增长趋势。甚至从 2021 年起,前 3 万个活跃仓库的事件数量超越了 GitHub,凸显了国内活跃开源项目的爆发态势。反映了国内开发者积极参与和贡献到开源社区的热情,为技术创新和知识共享注入了新的活力。 + +然而,需要强调的是,单纯依靠前 3 万个活跃项目的数据无法完全揭示全球 GitHub 平台的真实情况,因为长尾效应在全球范围内仍然非常明显。这一点将在后续的分析中更为清晰地体现出来,特别是 GitHub 平台作为全球领先的开源社区的广泛和多样性。在未来,随着技术的不断演进和开源文化的推动,可以期待中国开源社区在全球范围内继续蓬勃发展。 + +再进一步,分析基础事件的细分领域数据,结果如下图所示。 + +![1-3](/image/data/chapter_1/1-3.png) + + +
图 1.3 GitHub 与 Gitee 活跃仓库事件类型对比
+
+ +从分析结果可以看到: + +- 在 GitHub 平台上,最多的事件类型是 Push 事件,Pull Request 事件和 Issue Comment 事件分别居二三席。其中各个事件的发生次数占比基本上没有太大的变化,这体现出 GitHub 的开源生态模式在走向一个稳定的趋势。 +- 在 Gitee 平台上,事件数据在 2018 年有极大的增长,最初以 Watch 事件为主。但在 2020 年后,Pull Request、 Review Comment 事件开始快速增长,在 2022 年成为最多的事件类型,并且在 2023 年也有持续的增长。Gitee 事件数据的结构性变化,体现出国内开发者从关注者到贡献者角色的巨大转变,这和全球范围内的观察都是一致的。 + +#### 1.1.3 GitLink 事件数分析 + +对于 GitLink 平台,我们同样选择了前 3 万个活跃仓库作为基准。鉴于数据的局限性,仅选取了包括 CommitCommentEvent、ForkEvent、IssueCommentEvent、IssuesEvent、PullRequestEvent 和 WatchEvent 六种事件类型的数据进行分析。 + +![1-17](/image/data/chapter_1/1-17.png) + +
图 1.4 GitLink 平台事件数据分析
+
+ +观察到,尽管 Gitlink 的活跃仓库事件数相较于 GitHub、Gitee 等平台仍存在一定差距,但同样呈现明显的增长趋势。在 Gitlink 平台上,Issue 事件和 CommitComment 事件占活跃仓库事件的绝大多数。 + + +### 1.2 活跃仓库 + +#### 1.2.1 GitHub 全域活跃仓库数目趋势 + +统计分析了全域 GitHub 活跃仓库的数量信息,如下图所示。 + +![1-4](/image/data/chapter_1/1-4.png) + +
图 1.5 GitHub 年度活跃仓库数量趋势
+
+ +从 2023 年总体数据来说,全域活跃仓库数量达到了 8,792 万,比上一年增长了 4.06%,这和总体事件趋势一样、并在经过了 2018~2020 年的高增长后,开始逐年下降。这有可能和疫情以及全球经济发展的影响有关。 + +由于 GitHub 和 Gitee 仓库数量的差距,接下来的分析工作,同样建立在每个平台前 3 万个活跃仓库的基础之上。 + + +#### 1.2.2 GitHub 和 Gitee 总体活跃仓库活跃度趋势与对比 + +统计分析 GitHub 和 Gitee 总体活跃仓库活跃度趋势,如下图所示。 + +![1-5](/image/data/chapter_1/1-5.png) + + + +
图 1.6 GitHub 与 Gitee 活跃仓库活跃度对比
+
+ +从每个平台前 3 万个活跃仓库的活跃度数据来看,Gitee 平台的总体活跃度从 2019 年开始迅速增长,并在 2022 年的的时候甚至超越了 GitHub,且继续保持这高增长的趋势,揭示了这个时间中国开源发展的巨大活力。 + +![1-6](/image/data/chapter_1/1-6.png) + +
图 1.7 GitHub 与 Gitee 活跃仓库活跃度组成对比
+
+ +进一步,从细分的活跃度组成分析可以看到: + +在 GitHub 平台中,创建 PR 事件的占比接近总活跃度事件的一半,合并 PR 事件接近四分之一。审查 PR 事件占 10%,issue 创建和评论事件接近,占 7%。 + +在 Gitee 平台中,审查 PR 事件最多,占了总活跃度事件的三分之二,合并 PR 事件和 GitHub 平台一样,排在次席,且占比也比较接近。让人感到惊讶的数据是,在 GitHub 平台里占比最高的创建 PR 事件,在 Gitee 平台里是占比最少的,只占了总活跃度事件的 2%。 + +#### 1.2.3 GitHub 和 Gitee 总体活跃仓库 OpenRank 趋势与对比 + +统计分析 GitHub 和 Gitee 总体活跃仓库 OpenRank 趋势,如下图所示。 + +![1-7](/image/data/chapter_1/1-7.png) + +
图 1.8 GitHub 与 Gitee 活跃仓库 OpenRank 对比
+
+ +然而,从活跃仓库的总体 OpenRank 趋势分析可以看到,虽然 Gitee 的前 3 万仓库的活跃度在 2022 年一度超越了 GitHub,但 OpenRank 的影响力差距还是较大(大约为 5 : 2)。不仅差距较大,从趋势上看,也还没有拉近的迹象,这一点尤其值得关注,也是中国开源接下来的重点发展方向。 + +### 1.3 活跃用户 + +#### 1.3.1 GitHub 总体活跃用户数量趋势 + +统计分析 GitHub 总体活跃用户数量,如下图所示。 + +![1-8](/image/data/chapter_1/1-8.png) + +
图 1.9 GitHub 年度活跃用户数量趋势
+
+ +2023 年总体来说,全域活跃开发者数量达到了 2,193 万,比上一年增长了 8.88%。和 GitHub 活跃仓库数据一样,在经过了近五年的高增长后,增长率在 2020 年开始逐年下降, GitHub 平台的活跃用户增长开始放缓(虽然 GitHub 官方曾在 2023 年初的时候,宣布其平台整体用户数量突破一亿),这和全球局势的变化、以及中国像 Gitee 这样平台的崛起,也有一定的关系。 + +#### 1.3.2 活跃用户地理分布与排名 + +得益于 2023 年度 OpenDigger 开源软件生态数据分析挖掘平台挑战赛([OpenSODA](https://github.com/ECNU/OpenSODA))获奖作品的贡献,本次年度报告能够包括详细的 GitHub 开发者地理位置数据分析内容。 + +以下分析基于 GitHub 上 1000 万个活跃开发者展开。其中正确填写地理位置信息的开发者总量约 200 万人,按照 GitHub 全域注册用户数 1 亿计算,采样比例约为 2%。 + +**1、全球开发者地域分布** + +首先统计分析全球开发者的地域分布,如下图表所示。 + +![1-9](/image/data/chapter_1/1-9.png) + +
图 1.10 全球开发者地域分布
+
+ +
表1.1 全球开发者国家/地区人数分布(Top 15)
+
+ +| 排名 | 国家 | 总人数 | 占比 | 年度活跃数 | 活跃率 | +|:----:|:--------------:|:--------:|:------:|:---------------------:|:------:| +| 1 | United States | 408983 | 21.09% | 236899 | 57.92% | +| 2 | India | 177669 | 9.16% | 107066 | 60.26% | +| 3 | China | 171039 | 8.82% | 126238 | 73.81% | +| 4 | Brazil | 114855 | 5.92% | 83932 | 73.08% | +| 5 | Germany | 88767 | 4.58% | 64836 | 73.04% | +| 6 | United Kingdom | 83245 | 4.29% | 55175 | 66.28% | +| 7 | Canada | 65241 | 3.36% | 42238 | 64.74% | +| 8 | France | 57480 | 2.96% | 40341 | 70.18% | +| 9 | Russia | 47213 | 2.43% | 31534 | 66.79% | +| 10 | Australia | 31638 | 1.63% | 20512 | 64.83% | +| 11 | Poland | 31469 | 1.62% | 21792 | 69.25% | +| 12 | Japan | 30873 | 1.59% | 21942 | 71.07% | +| 13 | Netherlands | 30617 | 1.58% | 21685 | 70.83% | +| 14 | Spain | 28928 | 1.49% | 19509 | 67.44% | +| 15 | South Korea | 28325 | 1.46% | 21811 | 77.00% | + + +总体而言,各国的开发者都在不断增加: +- 美国凭借其在开源领域的先发、以及科技人才优势,位居第一; +- 按照表中美国总人数(40.9万)进行折算,实际 GitHub 上的美国开发者总人数大约在 2101 万左右,和 GitHub 公布的官方数据(2200 万)偏差大约在 4%; +- 印度、中国和巴西以其庞大的人口基数,开发者数量分别位居二三四位,但从活跃率(年度活跃数/总人数)可以看到,中国是前四名中最高的; +- 欧洲各国的开源开发者也是一股不小的力量,联合起来体量将上升到第二; +- 根据 GitHub 和 Gitee 公布的官方数据(均是 1200 万左右),中国的全球开源开发者总数将很有可能超过 2000 万,仅从数量上来说,这大约和美国相当。 + +**2、中国开发者地域分布** + +进一步分析,统计中国开发者的地域分布数,如下图表所示。其中,数据来源为“中国”的用户中,正确填写省份信息的开发者,样本数量近 15 万人。 + +![1-10](/image/data/chapter_1/1-10.png) + +
图 1.11 中国开发者地域分布
+
+ +根据 GitHub 2023 年 Q3 季度的数据,中国开发者总量约为 1188 万,依据比例可以估计各省实际开发者总量。 + + +
表1.2 中国开发者人数分布(Top 15)
+ +
+ +| 排名 | 省份 | 总人数 | 全国占比 | 实际总量 | +|:----:|:----:|:--------:|:--------:|:--------------:| +| 1 | 北京 | 32982 | 22.04% | 262.25 万 | +| 2 | 上海 | 24581 | 16.43% | 195.45 万 | +| 3 | 广东 | 21684 | 14.49% | 172.41 万 | +| 4 | 浙江 | 14256 | 9.53% | 113.35 万 | +| 5 | 台湾 | 12173 | 8.13% | 96.79 万 | +| 6 | 江苏 | 7335 | 4.90% | 58.32 万 | +| 7 | 四川 | 7012 | 4.69% | 55.75 万 | +| 8 | 香港 | 4678 | 3.13% | 37.19 万 | +| 9 | 湖北 | 4415 | 2.95% | 35.1 万 | +| 10 | 陕西 | 2815 | 1.88% | 22.38 万 | +| 11 | 福建 | 2405 | 1.61% | 19.12 万 | +| 12 | 山东 | 2035 | 1.36% | 16.18 万 | +| 13 | 湖南 | 1858 | 1.24% | 14.77 万 | +| 14 | 重庆 | 1833 | 1.22% | 14.57 万 | +| 15 | 安徽 | 1487 | 0.99% | 11.82 万 | + + + +上表中的排名和数据揭示了中国开源开发者和地区经济发展水平的相关性: +- 北、上、广、浙四大城市的开源开发者人数均超过了百万级,北京优势尤为明显; +- 台湾和香港分别位居第五和第八,凸显了港澳台地区的重要性; +- 长三角地区(江浙沪皖)的开源开发者体量达到了近 380 万; +- 中西部地区如四川、湖北、陕西等也有不错的表现,尤其是四川,凭借其宜居与快速发展的软件行业,吸引了大量的开发者。 + + +### 1.4 开源许可证 + +#### 1.4.1 使用开源许可证的仓库数量 + +统计了 GitHub 的活跃仓库采用的开源许可证的数量,如下图所示。 + +
+1-11 +
+ + +
图 1.12 使用开源许可证的仓库数量占比
+
+ +分析发现目前使用最多的开源许可证,包括 MIT 许可证、Apache 许可证 v2.0、GNU 通用公共许可证 v3.0、BSD 3-Clause 许可证。其中 MIT 许可证以接近 60% 的占比排名第一。MIT 许可证以麻省理工学院(Massachusetts Institute of Technology)为名,最早由该学院使用,因此得名。MIT 许可证的简洁和灵活性使其成为许多开发者选择的许可证之一,它提供了最小的法律限制,鼓励开发者自由地使用和传播软件。 + +#### 1.4.2 开源许可证种类变化趋势 + +统计分析了开源许可证种类变化趋势,如下图所示。 + +![1-12](/image/data/chapter_1/1-12.png) + +
图 1.13 开源许可证种类数量变化趋势
+
+ +总体来看,开源许可证的种类在 2017 年以来不断增加。Eclipse 公共许可证 2.0 和欧盟公共许可证 1.2 以及其他许可证的推出造成了 2017-2018 年的增长。在此之后开源许可证种类的增长速度放缓,在 2021 年至 2022 年间,一批新的开源许可证如木兰系列许可证、CERN(欧洲核子研究组织)许可证v2 开始崭露头角,随后发展趋于稳定,目前 GitHub 上主流许可证的种类也持续两年稳定在 46 个。 + +#### 1.4.3 使用开源许可证仓库数量变化趋势 + +根据日志数据显示,2023 年有接近 770 万个活跃仓库使用了各种开源许可证(占全体活跃仓库的 8.76%),其中由于 MIT 许可证强大的影响力,我们将其数据单独展示。 + +**1、使用 MIT 许可证仓库数量变化趋势** + +统计分析了 MIT 许可证仓库数量变化趋势,如下图所示。 + +![1-13](/image/data/chapter_1/1-13.png) + +
图 1.14 使用 MIT 许可证的仓库数量变化趋势
+
+ +可以看到: +- MIT 许可证是目前最流行的开源许可证,2023 年有 158 万个活跃仓库使用了该许可证; +- 使用 MIT 许可证的仓库情况和总仓库数量情况变化趋势类似,都有较大增长,但是在 2022 年和 2023 年的增长速度有所减缓,这和整体项目增长趋势放缓有关。 + +**2、其余前五的开源许可证数量变化趋势** + +统计分析了其他前五开源许可证仓库数量变化趋势,如下图所示。 + +![1-14](/image/data/chapter_1/1-14.png) + +
图 1.15 使用其他许可证的仓库数量变化趋势
+
+ +可以看到: +- 各类开源许可证的数量都在增长,但是开源许可证种类的头牌依旧还是以 MIT、Apache、GNU 等为主; +- 小众开源许可证和热门开源许可证的差异仍然存在; +- 从 2022 年开始,GNU 通用许可证 v2 和 v3 整体呈下降趋势; +- GNU Affero 通用许可证 v3 使用数量逐年上升。 + +#### 1.4.3 使用木兰系列许可证仓库数量变化趋势 + +统计分析使用木兰系列许可证仓库数量的变化趋势,如下图所示。 + +![1-15](/image/data/chapter_1/1-15.png) + +
图 1.16 使用木兰系列许可证的活跃仓库数量累加图
+
+ +木兰系列许可证(包含 “木兰宽松许可证” 和“木兰公共许可证”等),均由北京大学作为牵头单位,依托全国信标委云计算标准工作组和中国开源云联盟,联合开源生态圈(如开源社)及产学研团队和个体、尤其是开源法务和律师,起草、修订并发布。其中 Mulan PSL 是国内首个被 OSI 认定的 “开源软件协议”。 + +我们观测了 GitHub 中使用木兰许可证的活跃仓库(其中,活跃仓库是指仓库里有 issue 和 PR 或者有被用户标星等活动)的趋势,从 2022 年 9 月开始,使用木兰许可证的仓库开始增长。截至 2023 年 12 月,已有 220 ,木兰开源许可证的影响力在逐渐展现。 + +### 1.5 编程语言 + +#### 1.5.1 2023 年开发者使用编程语言榜单 + +编程语言的受欢迎程度也是开发者所喜闻乐见的,分析了 2023 年度最受开发者欢迎的编程语言,如下表所示。 + +
表 1.3 开发者使用编程语言排行榜(Top 15)
+
+ +| 排名 | 编程语言 | 使用该语言开发者数 | 使用该语言仓库数 | +|:-------:|:----------------:|:------------------:|:------------------:| +| 1 | JavaScript | 765589 | 1806477 | +| 2 | Python | 629423 | 653025 | +| 3 | HTML | 564121 | 676364 | +| 4 | TypeScript | 462729 | 886453 | +| 5 | Java | 368795 | 463660 | +| 6 | CSS | 190480 | 239187 | +| 7 | C++ | 177905 | 135330 | +| 8 | C# | 158159 | 180537 | +| 9 | Go | 143433 | 165367 | +| 10 | PHP | 128186 | 272980 | +| 11 | Jupyter Notebook | 122475 | 102708 | +| 12 | Shell | 122456 | 108209 | +| 13 | C | 107918 | 80159 | +| 14 | Rust | 69370 | 72778 | +| 15 | Ruby | 66857 | 374835 | +| 16 | Kotlin | 64307 | 62709 | +| 17 | Vue | 56099 | 170639 | +| 18 | SCSS | 50526 | 44672 | +| 19 | Dart | 46143 | 43006 | +| 20 | Swift | 33839 | 35978 | + +从上表中可以看出:开发者使用人数前五名的开发语言分别为 JavaScript、Python、HTML、TypeScript、Java,是开发者使用的主要编程语言,而从第 6 名的 CSS 开始,使用人数相较于第 5 名的 Java 降低了接近一半。 + +#### 1.5.2 2019-2023年开发者使用编程语言趋势 + +统计分析了开发者使用编程语言的趋势,如下图所示。 + +![1-16](/image/data/chapter_1/1-16.png) + +
图 1.17 2019 - 2023 年开发者使用编程语言趋势
+
+ +从上图中可以看出: +- JavaScript、Python、HTML、TypeScript、Java五种编程语言是开发者使用的主要编程语言; +- Python、TypeScript相对于其他的三个主要语言增长迅速,并且近5年内一直保持着快速增长的趋势; +- 其中TypeScript近5年来使用人数飞速增长,在2021年与排在其后的编程语言拉开了显著差距,成为了开发者使用的主要编程语言之一,预计2024年其开发者使用数将与排名第3的HTML编程语言的开发者使用数相当。 + +## 二. OpenRank 排行榜 + +**排行榜**是一种大家喜闻乐见的分析结果展现形式,2023 中国开源年度报告将排行榜独立出来,作为一个单独的部分集中展示,一则是为了更好的展示开源生态中各主体(仓库/项目、国家地区、企业、基金会、开发者等)的发展态势,另一个重要原因是因为 OpenRank 指标的逐渐成熟与全域数据的完整性。由于今年同时加入了 GitHub 与 Gitee 的全球数据,使得我们能够站在一个以中国开源为出发点的全球性视角,让全世界看到中国企业、基金会、开发者等主体为发展全球开源生态所做的共同努力与贡献,这是目前市面上其他报告所不具备的。 + +### 2.1 全球开源仓库 OpenRank 排名 + +![2-1](/image/data/chapter_2/2-1.png) + +
图 2.1 全球开源项目 OpenRank 排名(Top 20)
+ +### 2.2 中国开源项目 OpenRank 排名 + +![2-2](/image/data/chapter_2/2-2.png) + +
图 2.2 中国开源项目 OpenRank 排名(Top 20)
+
+ +> 中国开源项目是以 OpenDigger 项目标签数据为准,单项目可能包含多个 GitHub 或 Gitee 平台上的组织或仓库。 + +### 2.3 全球企业 OpenRank 排名 + +![2-3](/image/data/chapter_2/2-3.png) + +
图 2.3 全球企业 OpenRank 排名(Top 20)
+
+ +> 企业排名是以 OpenDigger 项目标签数据为准,含义为某企业发起的所有开源项目 OpenRank 之和,包含已捐赠到基金会的项目。 + +### 2.4 中国企业 OpenRank 排名 + +![2-4](/image/data/chapter_2/2-4.png) + +
图 2.4 中国企业 OpenRank 排名(Top 20)
+ +### 2.5 全球基金会 OpenRank 排名 + +![2-5](/image/data/chapter_2/2-5.png) + +
图 2.5 全球基金会 OpenRank 排名(Top 10)
+ +### 2.6 国家和地区 OpenRank 排名 + +![2-6](/image/data/chapter_2/2-6.png) + +
图 2.6 国家和地区 OpenRank 排名(Top 20)
+
+ +> 国家和地区使用 GitHub 开发者填写的位置信息,采集量为全球 OpenRank 用户前 1000 万名。 + +### 2.7 全球开发者 OpenRank 排名 + +![2-7](/image/data/chapter_2/2-7.png) +
图 2.7 全球开发者 OpenRank 排名(Top 30)
+ +### 2.8 中国开发者 OpenRank 排名 + +![2-8](/image/data/chapter_2/2-8.png) +
图 2.8 中国开发者 OpenRank 排名(Top 30)
+
+ +> 中国开发者账号以 OpenDigger 的标签数据为准。 + +## 三. 企业洞察 + +企业是推动全球开源生态发展的核心力量,既是发起者、还是开发者与维护者,在开源项目发展及其商业化探索的最前沿。 + +### 3.1 近 10 年全球企业 OpenRank 演变图 + +![3-1](/image/data/chapter_3/3-1.png) + +
图 3.1 中国企业 OpenRank 排名变化
+
+ +全球企业开源影响力的观察如下: + +- 微软从十多年前(2008 年)开始布局开源,在 2016 年站到了全球开源影响力的巅峰,直到今天无人撼动; +- 2019 年,华为正式被美国制裁开始,将开源作为重要战略方向,一路飙升,并于今年完成了对 Google 和 Amazon 的超越; +- 阿里巴巴在 2021 年前,一直是国内开源的领先者,并至今稳居世界第六的排名; +- 蚂蚁集团在近三年的表现也是非常抢眼,并于 2023 年正式进入世界前十; +- 国内开源的第四大巨头百度,由于国内快速的开源态势变化,目前排名全球 12; +- 根据 [OpenLeaderboard](https://open-leaderboard.x-lab.info/) 榜单,进入全球前 30 的中国企业还有字节跳动(18)、PingCAP(19)、飞致云(24)、Deepin(25)、腾讯(26)、以及 Espressif(27)。 + +### 3.2 近 10 年中国企业 OpenRank 演变图 + +![3-2](/image/data/chapter_3/3-2.png) + +
图 3.2 中国企业 OpenRank 排名变化
+
+ +本图能够很好表现国内公司的开源战略及其变化趋势: + +- 华为 2019 年开始发力,仅用 2 年的时间就做到了国内第一,全球第二的位置; +- 阿里和蚂蚁作为国内老牌开源引领的企业,表现稳定; +- 百度则由于前面三家的竞争,滑落第四; +- 字节跳动则是近几年肉眼可见的进步神速; +- [Espressif](https://github.com/espressif)(乐鑫科技)是国内开源界相对低调的半导体开源王者; +- Fit2Cloud 则是作为另一家低调但非常务实的开源企业,旗下多款开源软件深受开发者喜爱; +- 腾讯、PingCAP、JD、TAOS 等近两年略有下跌趋势,印证了后疫情时代的开源竞争将更加激烈。 + +### 3.3 中国企业在 GitHub/Gitee 平台上的 OpenRank 占比 + +
+ 3-3 + 3-4 +
+ +
图 3.3 中国企业在全球企业中的 OpenRank 占比(左)和 中美企业在项目维度 OpenRank 上的对比(右)
+
+ +左图显示了中国企业在全球开源生态中的影响力不断上升的趋势,右图则体现了中美两国后贸易战时代的此消彼长的趋势,特别是疫情后,在华为等企业的拉动下,中国开源影响力上升明显;但同样可以看到,中美企业在整体开源影响力之间的差距还是比较明显的(大约 3 倍的差距),但这个势头在未来非常值得期待。 + +## 四. 基金会洞察 + +本部分从基金会这一维度,对开源生态的发展进行分析。**基金会**是非营利型开源组织,在开源项目和开源社区的组织、发展、协同创新中起到了主导作用,为开源软件的孵化提供技术、运营、法律等全方位支持,为开源社区建设和运营提供指导,发挥了孵化器和加速器的作用,是开源生态中重要的组织者。今年,我们将开源基金会的洞察作为单独的一部分,可以看到中国的开源基金会所起到的全球性作用。 + +由于 Linux 基金会已经扩张成了一个超级基金会,旗下的子基金会作为独立一级,这样和其他基金会做对比分析更有意义。 + +### 4.1 全球基金会 OpenRank 趋势分析 + + +
+ + + +
+
图 4.1 全球基金会 OpenRank 整体变化趋势
+
+ +可以看到如下趋势: + +- Apache 基金会排名第一以其成熟稳定的节奏发展,今天仍然是很多企业发展全球化项目的首选; +- 开放原子开源基金会,成立 3 年多以来发展迅速,旗下项目影响力总和超越 Linux 基金会旗下的各子基金,仅次于 Apache 基金会排名第二; +- LF 的 AI & Data 子领域,由于近几年新一代人工智能的迅速发展,超过 CNCF 云原生领域排名第三; +- 其他的(子)基金会项目发展,总体来说均相对稳定。 + +### 4.2 全球基金会项目 OpenRank 趋势分析 + +
+ +
+
图 4.2 全球基金会项目 OpenRank 变化趋势
+
+ +从全球基金会下的开源项目来看: + +- Linux 基金会云原生领域的代表性项目 Kubernetes 继续排名第一,但影响力逐年下降,开始让位于新兴领域的项目; +- Apache 基金会下由百度发起的开源实时数据仓库 Doris,近几年发展迅速,排名第二; +- 开放原子旗下的 OpenHarmony 项目,及其多个子仓库紧随其后,如果合并起来计算,将排名第一。 + +### 4.3 基金会旗下中国项目 OpenRank 趋势分析 + +
+ +
+
图 4.3 中国基金会项目 OpenRank 变化趋势
+
+ +将各基金会旗下的中国项目单独拿出来看: + +- Doris 和 OpenHarmony 发展速度最为抢眼; +- Milvus 向量数据库由于 AIGC 领域的需求,发展迅速; +- Flink、ShardingSphere 等项目稳中有降。 + +### 4.4 开放原子基金会旗下项目 OpenRank 趋势分析 + +
+ + +
+
图 4.4 开放原子基金会项目 OpenRank 变化趋势
+
+ +今年能够第一次看到开放原子旗下各项目的发展情况: + +- 排名前三的分别是 OpenHarmony、openEuler 和 Anolis,代表了操作系统绝对的地位,特别是 OpenHarmony,发展最为迅速; +- 其他上榜项目发展非常平稳,期待后续能够在新的一年中发力。 + +## 五. 技术领域洞察 + +**技术领域**的发展在开源技术中起着风向标的作用,众多技术子领域展现出快速的进步和变革:**操作系统**在新架构中持续演化,**云原生**加速企业的数字化转型,**数据库**成为数据创新的基础设施,**大数据**助力智能决策,**人工智能**加快各行业的自动化实现,**前端**追求交互与美感。这些领域正成为科技前沿,吸引着创新者和投资者,形成蓬勃的发展态势。本节从影响力和活跃度两项指标对该六大领域展开数据洞察。 + +### 5.1 六大技术领域近五年整体发展趋势 + +![5-1](/image/data/chapter_5/5-1.png) + +
图 5.1 各子领域近 5 年 OpenRank 变化趋势
+ +![5-2](/image/data/chapter_5/5-2.png) +
图 5.2 各子领域近 5 年活跃度变化趋势
+
+ +从各子领域近 5 年的变化趋势来看,云原生优势明显,该领域下仓库数量相对其他领域较多,AI 由于这两年的火热增长迅速,数据库作为重要和活跃的基础软件排在前列,操作系统领域虽然总量较低,但影响力依然逐年增加,体现了基础软件的性质,数量虽少,但价值巨大,与前端领域的影响力逐年缩小。 + +### 5.2 各领域 OpenRank 与活跃度 Top 10 项目近五年变化趋势 + +#### 5.2.1 云原生 + +![5-3](/image/data/chapter_5/5-3.png) + +
图 5.3 云原生 OpenRank Top10 项目近五年变化趋势
+ +![5-4](/image/data/chapter_5/5-4.png) + +
图 5.4 云原生活跃度 Top10 项目近五年变化趋势
+
+ +kubernetes 双指标下降明显,Grafana 影响力排名第一。llvm-project 增速明显,活跃度位居第一,llvm 是一个编译器框架,是一系列模块化、可重用的编译器以及工具链技术的集合,从近 3 年来看,该项目活跃度增长迅速,受到广大开发者的喜爱。 + +#### 5.2.2 人工智能 + +![5-5](/image/data/chapter_5/5-5.png) + +
图 5.5 人工智能 OpenRank Top10 项目近五年变化趋势
+ +![5-6](/image/data/chapter_5/5-6.png) + +
图 5.6 人工智能活跃度 Top10 项目近五年变化趋势
+
+ +可以看到 tensorflow 自 2020 年开始双指标严重下滑,掉出前 5,同样作为 AI 开发框架,pytorch 稳步增长,逐步拉开与其他项目的差距,值得一提的是,langchain 项目自 2022 年开源后,双指标便位居第二名,LangChain 是由 Harrison Chase 于 2022 年 10 月推出的开源软件项目。它已成为 LLM 开发中最受欢迎的框架之一。 + +#### 5.2.3 大数据 + +![5-7](/image/data/chapter_5/5-7.png) + +
图 5.7 大数据 OpenRank Top10 项目近五年变化趋势
+ +![5-8](/image/data/chapter_5/5-8.png) + +
图 5.8 大数据活跃度 Top10 项目近五年变化趋势
+
+ +可以看出,大数据领域双指标整体呈现上升趋势,其中,Kibana 和 Grafana 位居影响力和活跃度前二位,并且差距逐渐缩小,预测在未来 Grafana 会超越 Kibana,成为第一名。 + +Kibana 是一个开源的数据可视化和探索工具,它与 ElasticSearch 紧密结合。Kibana 可以用于查询、分析和可视化 ElasticSearch 中的数据。 + +Grafana 是一个开源的数据可视化工具,它可以用于监控和报告。Grafana 支持多种数据源,如 Prometheus、InfluxDB、Graphite 等。它具有强大的数据处理和可视化功能,可以用于创建各种类型的图表和仪表板。 + +#### 5.2.4 数据库 + +![5-9](/image/data/chapter_5/5-9.png) + +
图 5.9 数据库 OpenRank Top10 项目近五年变化趋势
+ +![5-10](/image/data/chapter_5/5-10.png) + +
图 5.10 数据库活跃度 Top10 项目近五年变化趋势
+
+ +ClickHouse 数据库双指标持续稳定增长,ElasticSearch 热度回落,Doris 增速最快,活跃度指标接近第一名,预测该数据库双指标会在未来超越 ClickHouse。 + +ClickHouse 是俄罗斯的提索公司 Yandex 开源的 MPP 架构的分析引整,号称比事务数据库快 100-1000 +倍,最大的特色是高性能的向量化执行引擎,而且功能丰富、可靠性高。 + +Apache Doris 是由百度贡献的开源 MPP 分析型数据库产品,分布式架构简洁,易于运维。 + +#### 5.2.5 前端 + +![5-11](/image/data/chapter_5/5-11.png) + +
图 5.11 前端 OpenRank Top 10 项目近五年变化趋势
+ +![5-12](/image/data/chapter_5/5-12.png) + + +
图 5.12 前端活跃度 Top 10 项目近五年变化趋势
+
+ +Flutter 虽然双指标逐年下降,相较于 Next.js 仍具有明显的优势,Next.js 在 2023 年开始发力,上升明显,3-10 名项目竞争激烈,差距不大。 + +Flutter 是由 Google 开发和提供支持的框架。前端和全栈开发人员使用 Flutter 为具备单一代码库的多个平台构建应用程序的用户界面。 + +Next.js 是由 Vercel 创建的开源平台,它使用 Node.js 和 Babel 转译器构建,并设计为与 React 单页应用框架一起使用。另外,Next.js 还提供许多有用的功能,例如预览模式、快速开发者编译和静态导出。 + +#### 5.2.6 操作系统 + +![5-13](/image/data/chapter_5/5-13.png) + +
图 5.13 操作系统 OpenRank Top 10 项目近五年变化趋势
+
+ +![5-14](/image/data/chapter_5/5-14.png) + +
图 5.14 操作系统活跃度 Top 10 项目近五年变化趋势
+
+ +可以看到,OpenHarmony 项目下的多个仓库均处于前 10 的榜单中,本次洞察中结合了 Gitee 平台的数据,可以更直观的看到国产操作系统在各方面所处的优势(OpenHarmony 项目下有多个仓库,本次洞察以仓库的维度进行分析),SerenityOS 从 2021 年开始双指标稍显回落,仅次于 OpenHarmony、OpenEuler 也有着不错的表现。 + +### 5.3 2023 年各领域 OpenRank TOP 10 榜单 + +以下再给出 2023 年各领域的项目 OpenRank 排行榜。 + +#### 5.3.1 云原生 + +
表 5.1 云原生领域项目排行榜 +
+
+ +| 序号 | 项目名称 | OpenRank | +|:---:|:----------------------:|:--------:| +| 1 | grafana/grafana | 7134.37 | +| 2 | llvm/llvm-project | 7049.62 | +| 3 | kubernetes/kubernetes | 5374.14 | +| 4 | ClickHouse/ClickHouse | 4941.99 | +| 5 | cilium/cilium | 3215.42 | +| 6 | ceph/ceph | 3172.49 | +| 7 | keycloak/keycloak | 3095.56 | +| 8 | gravitational/teleport | 3082.18 | +| 9 | envoyproxy/envoy | 2929.08 | +| 10 | backstage/backstage | 2903.39 | + +#### 5.3.2 人工智能 + +
表 5.2 人工智能领域项目排行榜 +
+
+ +| 序号 | 项目名称 | OpenRank | +|:---:|:------------------------------------:|:--------:| +| 1 | pytorch/pytorch | 10182.45 | +| 2 | langchain-ai/langchain | 6080.25 | +| 3 | PaddlePaddle/Paddle | 5408.62 | +| 4 | huggingface/transformers | 4422.84 | +| 5 | AUTOMATIC1111/stable-diffusion-webui | 3881.6 | +| 6 | openvinotoolkit/openvino | 3857.31 | +| 7 | microsoft/onnxruntime | 3006.75 | +| 8 | tensorflow/tensorflow | 2723.26 | +| 9 | Significant-Gravitas/AutoGPT | 2664.85 | +| 10 | ggerganov/llama.cpp | 2339.8 | + +#### 5.3.3 大数据 + +
表 5.3 大数据领域项目排行榜 +
+
+ +| 序号 | 项目名称 | OpenRank | +|:----:|:---------------------:| -------- | +| 1 | elastic/kibana | 7601.04 | +| 2 | grafana/grafana | 7134.37 | +| 3 | ClickHouse/ClickHouse | 4941.99 | +| 4 | airbytehq/airbyte | 4658.86 | +| 5 | apache/doris | 4307.26 | +| 6 | elastic/elasticsearch | 3729.39 | +| 7 | apache/airflow | 3642.9 | +| 8 | StarRocks/starrocks | 3194.56 | +| 9 | trinodb/trino | 2703.4 | +| 10 | apache/spark | 2654.02 | + +#### 5.3.4 数据库 + +
表 5.4 数据库领域项目排行榜 +
+
+ +| 序号 | 项目名称 | OpenRank | +|:---:|:---------------------:|:---------:| +| 1 | ClickHouse/ClickHouse | 4941.99 | +| 2 | apache/doris | 4307.26 | +| 3 | elastic/elasticsearch | 3729.39 | +| 4 | cockroachdb/cockroach | 3443.7 | +| 5 | StarRocks/starrocks | 3194.56 | +| 6 | trinodb/trino | 2703.4 | +| 7 | apache/spark | 2654.02 | +| 8 | pingcap/tidb | 2200.38 | +| 9 | milvus-io/milvus | 2001.11 | +| 10 | yugabyte/yugabyte-db | 1940.75 | + +#### 5.3.5 前端 + +
表 5.5 前端领域项目排行榜 +
+
+ +| 序号 | 项目名称 | OpenRank | +|:---:|:---------------------:|:--------:| +| 1 | flutter/flutter | 9361.81 | +| 2 | vercel/next.js | 6638.65 | +| 3 | appsmithorg/appsmith | 3474.07 | +| 4 | nuxt/nuxt | 3387.23 | +| 5 | facebook/react-native | 3260.55 | +| 6 | ant-design/ant-design | 3053.25 | +| 7 | nodejs/node | 2736.37 | +| 8 | angular/angular | 2273.82 | +| 9 | electron/electron | 1773.31 | +| 10 | denoland/deno | 1654.01 | + +#### 5.3.6 操作系统 + +
表 5.6 操作系统领域项目排行榜 +
+
+ +| 序号 | 项目名称 | OpenRank | +|:----:|:-----------------------------------:|:--------:| +| 1 | openharmony/docs | 3277.69 | +| 2 | openharmony/arkui_ace_engine | 2818.09 | +| 3 | SerenityOS/serenity | 2257.68 | +| 4 | openharmony/graphic_graphic_2d | 1239.6 | +| 5 | openeuler/docs | 1206.9 | +| 6 | openharmony/xts_acts | 1186.06 | +| 7 | openharmony/arkcompiler_ets_runtime | 961.99 | +| 8 | openharmony/interface_sdk-js | 910.91 | +| 9 | reactos/reactos | 745.23 | +| 10 | armbian/build | 679.1 | + +## 六. 开源项目洞察 + +2023 年,AI 大模型领域蓬勃发展,GPT-4、CLIP 等新一代模型崭露头角。全球企业竞相投入研发,推动语言理解、图像生成等前沿技术。行业呈现快速演进和探索的态势,为人工智能的广泛应用打开新篇章。与此同时,数据库领域迎来创新潮流。分布式数据库、时序数据库、图数据库等异彩纷呈,满足不同应用场景需求。云原生数据库蓬勃发展,提供灵活扩展与高可用性。本节从项目类型视角开展数据洞察,并对项目 Topic 进行统计分析,最后分别对数据库与 AI 两个核心领域开展了深入洞察。 + +### 6.1 项目类型 + +本小节选取了 GitHub 活跃度排名前 10,000 的仓库数据进行统计分析。 + +#### 6.1.1 不同项目类型数量比例 + + +6-1 + +
图 6.1 不同项目类型数量比例
+
+ +- 组件框架类型(Libraries and Frameworks)占比最高(31.36%),充分反映了开源协同创新特点(站在巨人的肩膀上),也是最受开发者喜爱、乐于贡献的类型; +- 应用软件类型(Application Software)由于其实用性,占比仅次于组件框架类(24.34%),能够使得所有用户(不仅仅局限于开发者)在各个行业与领域使用开源软件; +- 内容资源类型(Non Software)也有不小的占比(23.17%),这体现了开源这一协同开发模式从软件延伸到全内容领域的趋势,包括文档、教育、艺术、硬件或其他非编程相关领域的创作和贡献; +- 软件工具类型(Software Tools)的占比为 18.9%,对于开发者这个专业群体来说,是个不容忽视的比例,能够使开发者专注于构建实际的应用程序和软件产品; +- 系统软件类型(System Software)占比最少(仅 2.3%),体现了基础软件的性质,数量虽少,但价值巨大,难度也是最高的。 + +#### 6.1.2 不同项目类型 OpenRank 加总比例 + +6-2 + +
+
图 6.2 不同项目类型 OpenRank 加总比例
+
+ +进一步,再从 OpenRank 影响力视角看这几个类别: + +- 最大的变化,就是内容资源类型(Non Software)项目虽然有较高的活跃度,但其影响力相对较低; +- 而系统软件类型(System Software)虽然活跃度占比很少,但其影响力占比相对更多,软件工具类型(Software Tools)项目也有类似的现象; +- 组件框架类型和应用软件类型则没有太多变化,都是属于占比较多的类型。 + +#### 6.1.3 不同项目类型近 5 年 OpenRank 变化趋势 + +6-3 + +
+
图 6.3 不同项目类型近 5 年 OpenRank 变化趋势
+
+ +从上面的五年 OpenRank 演化图上可以看得出来,系统软件类型(System Software)的影响力也是逐年升高、而内容资源类型(Non Software)项目的影响力比例是在相对下降这样一个现象。 + +### 6.2 项目 Topic 分析 + +本节同样选取 GitHub 活跃度排名前 10,000 的仓库数据进行分析,并获取仓库下的 Topic 标签进行深入洞察。 + +#### 6.2.1 热门 topic + +6-4 + +
图 6.4 出现次数前十的 Topic
+
+ +前十的主题涵盖了多个领域,反映了开源社区的广泛兴趣。其中,JavaScript、Hacktoberfest、Python 等主题代表了前沿技术、活跃社区活动和多用途编程语言的热点,突显了对于前端开发、开源贡献和多领域编程的关注。 + +#### 6.2.2 热门 Topic 的仓库总 OpenRank 趋势 + +
6-5
+ +
图 6.5 出现次数前十的 Topic 下仓库的 OpenRank 变化 (2019 - 2023)
+
+ +- Hacktoberfest 是一个每年十月举办的活动,旨在促进开源社区的发展。它由 DigitalOcean 与 GitHub 合作发起,其目标是鼓励更多人参与开源项目,为开源社区做出贡献。它的 OpenRank 高反映了人们对于开源项目、社区参与和贡献的热情。开发者们积极参与这个活动,通过向开源项目提交 Pull Request 的方式贡献,从而帮助提升了这个仓库的声誉和影响力。 +- JavaScript 和 Python:这两项技术在过去几年中保持了相对稳定的趋势,没有出现大幅度的增长或下降。 + +### 6.3 数据库领域项目分析 + +本小节依据 [Database of Databases](https://dbdb.io/) 及 [DB-Engines Ranking](https://db-engines.com/en/ranking) 中公开的开源数据库信息,将该领域按数据库的存储结构及用途划分为 Relational、Key-value、Document、Search Engine、Wide Column、Time Series、Graph、Vector、Object Oriented、Hierarchical、RDF、Array、Event、Spatial、Native XML、Multivalue、Content、 Network 18 个子类,收集并分析其在 GitHub 上的相应数据库开源项目的协作日志数据,详细洞察结果如下: + +#### 6.3.1 数据库领域各子领域 2023 年 OpenRank 和活跃度榜单 + +**1、数据库子领域 OpenRank 榜单** + +
表 6.1 数据库子领域 OpenRank 排行 +
+
+ +| 排名 | 子领域名称 | OpenRank | +|:---:|:---------------:|:--------:| +| 1 | Relational | 58092.36 | +| 2 | Key-value | 21834.08 | +| 3 | Document | 17264.93 | +| 4 | Search Engine | 8093.77 | +| 5 | Wide Column | 7896.43 | +| 6 | Time Series | 7813.54 | +| 7 | Graph | 5196.52 | +| 8 | Vector | 4965.41 | +| 9 | Object Oriented | 3104.07 | +| 10 | Hierarchical | 1355.4 | +| 11 | RDF | 592.68 | +| 12 | Array | 383.95 | +| 13 | Event | 256.59 | +| 14 | Spatial | 224.05 | +| 15 | Native XML | 209.51 | +| 16 | Multivalue | 15.89 | +| 17 | Content | 3.43 | + +**2、数据库子领域活跃度榜单** + +
表 6.2 数据库子领域活跃度排行 +
+
+ +| 排名 | 子领域名称 | activity | +|:---:|:---------------:|:---------:| +| 1 | Relational | 161025.44 | +| 2 | Key-value | 62501.64 | +| 3 | Document | 49400.11 | +| 4 | Search Engine | 23799.87 | +| 5 | Time Series | 22077.57 | +| 6 | Wide Column | 21292.17 | +| 7 | Vector | 16395.88 | +| 8 | Graph | 14947.43 | +| 9 | Object Oriented | 8418.14 | +| 10 | Hierarchical | 3406.55 | +| 11 | RDF | 1701.67 | +| 12 | Array | 1280.14 | +| 13 | Native XML | 737.94 | +| 14 | Spatial | 680.79 | +| 15 | Event | 654.42 | +| 16 | Content | 33.94 | +| 17 | Multivalue | 12.68 | + +从数据库领域各子领域 2023 年的 OpenRank 和活跃度排行可以看出: + +- Relational、Key-value、Document 在以上两项指标中都稳据前三,前三名的数据库子领域的两项指标累计均占数据库领域两项指标的 70% 以上; +- Relational 的各项指标超过了第二至第五名的总和,其两项指标均占数据库领域两项指标的 40% 以上,是一个超大子类。 + +#### 6.3.2 数据库领域各子领域下项目近五年变化趋势 + +![6-6](/image/data/chapter_6/6-6.png) + +
图 6.6 数据库领域各子领域 2019 - 2023 年 OpenRank 变化趋势
+ +![6-7](/image/data/chapter_6/6-7.png) + +
图 6.7 数据库领域各子领域 2019 - 2023 年 Activity 变化趋势
+
+ +从数据库领域各子领域项目近 5 年 OpenRank 变化趋势及活跃度变化趋势上可以看出: + +- 近 5 年,Relational、Key-value、 Document 在以上两项指标中都持续稳据前三; +- Search Engine、 Wide Column、Time Series、 Graph、 Vector、 Object Oriented 则占据了第四到第九名,且两项指标均有逐渐上升的趋势; +- 从上图的排名变化可以看出 Search Engine 和 Vector 的活跃度增速相对较快,Search Engine 的 OpenRank 也提升了两个名次,已经跃居第四大子类,而 Vector 子类还有未爆发的 OpenRank 潜力,其 OpenRank 值已经与 Graph 子类非常接近了;大模型带来的影响目前仍未减退,预计 2024 年 Vector 子类将超过 Graph 子类。 + + +#### 6.3.3 数据库领域各子领域下项目的开源象限图 + +开源象限图共涉及三个度量指标:Activity、Openrank、CommunityVolume。 +其中 CommunityVolume 与 open-digger 中的 Attention 指标计算公式相同,即对一定时间内目标项目的 star 数和 fork 数的加权求和:`sum(1*star+2*fork)`。 + +象限图绘制方法: +1. 把每个数据库子类按 activity 选取 Top 10 的项目; +2. 做出 `log(openrank)-log(communityvolume)` 的 `log(x)-log(y)` 散点图, log 的底数均为 2,分别表示空间影响力 openrank 与时间影响力 communityvolume 衰减到 1 所需的半衰次数; +3. 以图上所有点的横坐标均值所对应的纵向线作为纵轴,以图上所有点的纵坐标均值所对应的横向线作为横轴划分为四个象限。 + +数据库领域子类标签共计18个,选取2023年活跃度占比超过1%的前9个类别统计分析,绘制开源象限图如下: + +
+ +
+ + + +
图 6.8 关系型数据库 OpenRank-CommmunityVolume log-log 开源象限图

+ +
+ +
+ +
图 6.9 键值数据库 OpenRank-CommmunityVolume log-log 开源象限图

+ + +
+ +
+ +
图 6.10 文档型数据库 OpenRank-CommmunityVolume log-log 开源象限图

+ +
+ +
+ +
图 6.11 搜索引擎 OpenRank-CommmunityVolume log-log 开源象限图

+ +
+ +
+ +
图 6.12 时序数据库 OpenRank-CommmunityVolume log-log 开源象限图

+ +
+ +
+ +
图 6.13 宽列数据库 OpenRank-CommmunityVolume log-log 开源象限图

+ +
+ +
+ +
图 6.14 向量数据库 OpenRank-CommmunityVolume log-log 开源象限图

+ +
+ +
+
图 6.15 图数据库 OpenRank-CommmunityVolume log-log 开源象限图

+ +
+ +
+
图 6.16 面向对象数据库 OpenRank-CommmunityVolume log-log 开源象限图

+ +
+ +
+
图 6.17 活跃度 Top 9 子类数据库 OpenRank-CommmunityVolume log-log 开源象限图

+ +搜索引擎类两极分化严重,既有像 ElasticSearch 这样 OpenRank 和 CommmunityVolume 都很高的项目,又有像 Sphinx 和 Xapian 这样 OpenRank 和 CommmunityVolume 都极低的项目。 + +从第一象限看出:relational、document、searchengine、vector 都是 openrank 影响力较强且 CommmunityVolume 关注度也较强的数据库类型,而 object_oriented 则在两方面相对较弱。 + +从活跃度 Top 9 子类数据库的开源象限图中的纵向分布可以看出:search_engine、vector 两个子类的 CommmunityVolume 相较于 OpenRank 更高,有较高的社区声量,相比于其他的子类有较快的发展期望。 + +### 6.4 生成式 AI 领域项目分析 + +本小节参考 [Generative AI Open Source (GenOS) Index](https://www.decibel.vc/articles/launching-the-generative-ai-open-source-genos-index) 中的生成式 AI 领域开源项目,将其分为 tool、model、application 和 infrastructure 四个子类展开分析, 详细洞察结果如下: + +#### 6.4.1 生成 AI 各子领域近 5 年增长趋势 + +6-8 + + +
图 6.18 生成式 AI 各子领域 2019 - 2023 年 OpenRank 变化趋势
+
+ +6-9 + +
图 6.19 生成式 AI 各子领域 2019 - 2023 年活跃度变化趋势
+
+ +- 根据类别(模型类、工具类、应用类、基础类)划分的分类分析,在活跃度和影响力上总体趋势一致; +- 模型类 AIGC 开源项目的影响力和活跃度均高于工具类和应用类; +- 模型类项目影响力自 2022 年开始增长迅速,在 2023 年超过基础类,整体呈现上升趋势,代表着 2023 年是 AIGC 创新应用开发的大爆发之年,预计后续还会继续走高,从而也同时带动应用类项目的大发展。 + +#### 6.4.2 生成式 AI 领域项目 OpenRank 和活跃度 Top 10 变化趋势 + +6-10 + +
图 6.20 生成式 AI 领域 OpenRank Top 10 项目近 5 年变化趋势
+
+ +6-11 + +
图 6.21 生成式 AI 领域活跃度 Top 10 项目近 5 年变化趋势
+
+ +- langchain 影响力和活跃度双排名第一,备受开发者的关注; +- transformers 作为该领域前几年一直的王者,只到 2023 年才被超越,充分展示了其 AIGC 领域奠基性的地位,该项目在学术领域与开源领域影响力均表现不俗; +- stable-diffusion-webui 也是备受开发者关注的 AIGC 工具,尤其在活跃度这方面超越 transformers 稳居第二,2024 年影响力超越 transformers 也是大概率事件; +- 部分 AIGC 项目在 2023 年开源后影响力与活跃度就迅速增长并位于 Top 10 榜单,展示了 AIGC 领域这种极端快速的变化特点。 + +#### 6.4.3 2023 年生成式 AI 领域项目 OpenRank 和活跃度 Top 10 榜单 + +**1、生成式 AI 领域 OpenRank Top 10 项目榜单** + +
表 6.3 生成式 AI 领域 OpenRank 度排行 +
+
+ +| 排名 | 项目名称 | OpenRank | +|:---:|:------------------------------------:|:--------:| +| 1 | langchain-ai/langchain | 6080.25 | +| 2 | huggingface/transformers | 4422.84 | +| 3 | AUTOMATIC1111/stable-diffusion-webui | 3881.6 | +| 4 | Significant-Gravitas/AutoGPT | 2664.85 | +| 5 | ggerganov/llama.cpp | 2339.8 | +| 6 | oobabooga/text-generation-webui | 2242.5 | +| 7 | milvus-io/milvus | 2001.11 | +| 8 | run-llama/llama_index | 1913.01 | +| 9 | facebookincubator/velox | 1589.53 | +| 10 | invoke-ai/InvokeAI | 1571.45 | + +**2、生成式 AI 领域活跃度 Top 10 项目榜单** + +
表 6.4 生成式 AI 领域活跃度排行 +
+
+ +| 排名 | 项目名称 | activity | +|:---:|:------------------------------------:|:--------:| +| 1 | langchain-ai/langchain | 22563.04 | +| 2 | AUTOMATIC1111/stable-diffusion-webui | 13933.03 | +| 3 | huggingface/transformers | 13618.11 | +| 4 | Significant-Gravitas/AutoGPT | 10961.81 | +| 5 | cobabooga/text-generation-webui | 8597.33 | +| 6 | ggerganov/llama.cpp | 8108.62 | +| 7 | run-llama/llama_index | 7532.47 | +| 8 | milvus-io/milvus | 6488.35 | +| 9 | facebookincubator/velox | 4923.05 | +| 10 | chatchat-space/Langchain-Chatchat | 4477.63 | + +## 七. 开发者洞察 + +**开发者**是开源创新全过程的核心参与方,是开源项目及其衍生产品的生产者和供给方,是开源项目的主要贡献者。开发者的整体数量规模和协作机制对开发者整体的贡献量具有决定性影响。本节以个体开发者为单位开展数据洞察,并在国家与地区层面上做适当的聚合分析。 + +### 7.1 开发者的地区分布 + +和前面 1.3 部分的分析类似,以下分析基于 GitHub 上 1000 万个活跃开发者展开。其中正确填写地理位置信息的开发者总量约 200 万人,按照 GitHub 全域注册用户数 1 亿计算,采样比例约为 2%。 + +**1、GitHub 活跃开发者分布图** + +首先将 GitHub 上活跃开发者数量在地图上进行可视化分析,如下图所示。 + +![7-1.png](/image/data/chapter_7/7-1.png) +
图 7.1 2023 GitHub 活跃开发者分布图
+
+ +地图上可以直观地感受 GitHub 开发者大致的地理位置分布。在人口数量众多且互联网发展较快的地区例如中国沿海地区、欧洲地区、美国、印度、巴西东南沿海地区有着最为密集的 GitHub 开发者分布,在其他居住人口少或者互联网较不发达的地区则相对稀疏。 + +**2、GitHub 活跃开发者国家 / 地区分布图** + + +![7-2.png](/image/data/chapter_7/7-2.png) + +
图 7.2 2023 GitHub 活跃开发者国家 / 地区分布图 +
+
+ +
表 7.1 2023 活跃开发者数量国家 / 地区排名 +
+
+ + +| 排名 | 国家|活跃数量| +|:---:|:---:|:---------------:| +|1|United States | 236899| +|2|China | 113893| +|3|India | 107066| +|4|Brazil | 83932| +|5|Germany | 64836| +|6|United Kingdom | 55175| +|7|Canada |42238| +|8|France |40341| +|9|Russia |31534| +|10|Japan |21942| + + +可以发现美国的人数最多,中国、印度和巴西则紧随其后,其他拥有一定人口数量和经济水平的国家例如加拿大及一些欧洲国家也拥有着较多的 GitHub 开发者。 + +**3、中国活跃 GitHub 开发者分布图** + +将 GitHub 活跃开发者分布数量在地图上进行可视化分析,如下图表所示。 + +![7-4.png](/image/data/chapter_7/7-4.png) + +
图 7.3 2023 中国活跃开发者分布图
+
+ + +
表 7.2 2023 活跃开发者数量中国地区排名 +
+
+ +|排名 | 地区 | 数量 | +|:---:|:---:|:---------------:| +|1|北京|24151| +|2|上海|18215| +|3|广东|16153| +|4|浙江|10927| +|5|台湾|8823| +|6|江苏|5437| +|7|四川|5311| +|8|香港|3344| +|9|湖北|3273| +|10|陕西|1993| + +可以发现北京拥有着国内最多的 GitHub 用户,其次是上海、广州和浙江。中国的大部分 GitHub 活跃用户都在东部沿海地区,中部的一些省份例如陕西、湖南、湖北也有拥有不少的活跃用户,值得注意的是四川是除了沿海地区以外拥有最多 GitHub 活跃用户的省份。 + +**4、OpenRank 加权后的 GitHub 中国开发者影响力分布图** + +尝试用各地区开发者的 OpenRank 值做聚合,得到中国开发者影响力分布图与地区排名,如下图表所示。 + +![7-3.png](/image/data/chapter_7/7-3.png) + +
图 7.4 中国开发者 OpenRank 影响力分布图
+
+ +
表 7.3 中国地区 OpenRank 影响力排名
+
+ +|排名 | 地区 | OpenRank | +|:---:|:---:|:---------------:| +|1|北京|506624.08| +|2|上海|435804.42| +|3|广东|306014.24| +|4|浙江|274284.92| +|5|台湾|216991.49| +|6|四川|96881.79| +|7|江苏|83321.13| +|8|香港|83238.46| +|9|湖北|51370.74| +|10|福建|33482.25| + + +从排名上可以看到 OpenRank 地区排名和活跃开发者数量地区排名高度一致: +- 地区差异性较大:北京、上海开发者整体影响力归为第一档,广东、浙江、台湾开发者整体影响力在第二档,这两档和排在后面的地区有较大差异; +- 四川的整体活跃人数虽然较江苏少,但整体影响力更大,同样的现象也发生在福建和陕西身上。 + + +### 7.2 开发者工作时间分析 + +本小节对 GitHub 及 Gitee 开发者工作时间进行分析。本节时间默认使用 UTC 区时,相比东八区滞后 8 小时。数据默认使用 min-max 方法放缩到 [1-10] 区间,时区图中圆点面积越大代表数值越高。 + +#### 7.2.1 全域开发者工作时间分布 + +**GitHub 全域开发者工作时间分布** + +统计 GitHub 全域开发者工作时间,可以看出 GitHub 开发者工作时长分布在 6 时到 21 时更普遍,12 点更为集中,可能是受到定时任务的影响。周六周日相对更不活跃。 + +![7-5.png](/image/data/chapter_7/7-5.png) + +
图 7.5 GitHub 全域开发者 2023 年工作时间打点图
+
+ +**Gitee 全域开发者工作时间分布** + +![7-6.png](/image/data/chapter_7/7-6.png) + +
图 7.6 Gitee 全域开发者 2023 年工作时间打点图
+
+ +可以明显看出,Gitee 数据更符合东八区作息规律。 + +**除去机器人的全域开发者时间分布** + +![7-7.png](/image/data/chapter_7/7-7.png) + +
图 7.7 除去机器人的全域开发者 2023 年工作时间打点图
+
+ +去掉机器人数据后发现开发者时间分布集中在 6 时 - 21 时这个区间更为普遍,分布更加均匀。 + +#### 7.2.2 项目工作时间分布 + +以下为 2023 年度中国仓库 OpenRank 前四名与全球 GitHub 仓库 OpenRank 前四名的工作时间分布对比。 + +**全球 GitHub 仓库 OpenRank 前四名工作时间分布** + +1. NixOS/nixpkgs + +![7-8.png](/image/data/chapter_7/7-8.png) + +
图 7.8 NixOS/nixpkgs 2023 年工作时间打点图
+
+ +2. home-assistant/core + +![7-9.png](/image/data/chapter_7/7-9.png) + +
图 7.9 home-assistant/core 2023 年工作时间打点图
+
+ +3. microsoft/vscode + +![7-10.png](/image/data/chapter_7/7-10.png) + +
图 7.10 microsoft/vscode 2023 年工作时间打点图
+
+ +4. MicrosoftDocs/azure-docs + +![7-11.png](/image/data/chapter_7/7-11.png) + +
图 7.11 MicrosoftDocs/azure-docss 2023 年工作时间打点图
+
+ +**中国仓库 OpenRank 前四名工作时间分布** + +1. openHarmony + +![7-12.png](/image/data/chapter_7/7-12.png) + +
图 7.12 openHarmony 2023 年工作时间打点图
+
+ +2. openEuler + +![7-13.png](/image/data/chapter_7/7-13.png) + +
图 7.13 openEuler 2023 年工作时间打点图
+
+ +3. PaddlePaddle + +![7-14.png](/image/data/chapter_7/7-14.png) + +
图 7.14 PaddlePaddle 2023 年工作时间打点图
+
+ +4. MindSpore + +![7-15.png](/image/data/chapter_7/7-15.png) + +
图 7.15 MindSpore 2023 年工作时间打点图
+
+ +### 7.3 开发者角色分析 + +本部分基于 GitHub 用户在开源仓库中所触发的事件将 GitHub 用户分为**探索者**、**参与者**、**贡献者**、**提交者**(Committer)四个角色,四种角色定义如下表所示。 + +
表 7.5 四种开发者角色 +
+
+ +| 角色 | 定义 | 含义 | +|-----|-------------------------------|----------------------| +| 探索者 | 对某一项目进行 star 的用户 | 表示用户对该项目有一定的兴趣 | +| 参与者 | 对某项目有过 Issue 或者 Comment 行为的用户 | 表示用户参与该项目 | +| 贡献者(Contributer) | 对某项目中有 Pull Request (PR) 的用户 | 表示用户对项目 CodeBase 有贡献 | +| 提交者(Committer) | 参与 PR-review 或 merge 的用户 | 表示用户对项目有深度贡献 | + +一般情况下,四种关系层层递进,结构如下图所示。基于所定义的角色体系,我们从角色数量、时间变化、开发者角色演化三个视角对 GitHub 全域项目中 OpenRank 排名前十的项目进行量化,即第二部分中的项目排名榜单。 + +![7-16.png](/image/data/chapter_7/7-16.png) + +
图 7.16 开发者类型关系
+
+ +#### 7.3.1 各角色数量分布 + +
表 7.6 OpenRank 排名前 10 项目各开发者角色数量分布 +
+
+ +| 仓库名 | 探索者 | 参与者 | 贡献者 | 提交者 | +|------------------------------------------|-------|-------|------|------| +| NixOS/nixpkgs | 6244 | 3381 | 3074 | 2638 | +| home-assistant/core | 17777 | 9116 | 1230 | 905 | +| microsoft/vscode | 20113 | 16027 | 525 | 339 | +| MicrosoftDocs/azure-docs | 8939 | 2282 | 1591 | 610 | +| pytorch/pytorch | 13237 | 6391 | 1230 | 685 | +| godotengine/godot | 23426 | 7203 | 1020 | 569 | +| flutter/flutter | 14056 | 11101 | 637 | 334 | +| odoo/odoo | 5078 | 1841 | 930 | 570 | +| digitalinnovationone/dio-lab-open-source | 3619 | 907 | 504 | 40 | +| microsoft/winget-pkgs | 1852 | 1395 | 1384 | 286 | + +
+ +![7-17.png](/image/data/chapter_7/7-17.png) + +
图 7.17 开发者角色分布图
+
+ +结果表明: +- 探索者的数量普遍较高,这表明这些项目受到了广泛的关注和支持。godotengine/godot、microsoft/vscode、home-assistant/core 探索者数量远超其他项目,说明了这三个项目受到广泛关注; +- microsoft/vscode 是参与者与贡献者人数差距最大的项目,而 microsoft/winget-pkgs 二者差距最小; +- 提交者(Committer)角色方面,NixOS/nixpkgs 项目提交者人数最多,有 2,638 人;digitalinnovationone/dio-lab-open-source 项目提交者数最少。 + +#### 7.3.2 2023 年各角色新增情况 + +角色新增的统计口径为:某用户在 2023 年之前不是 X 角色(例如贡献者或提交者角色),在 2023 年成为 此角色,则为 X 角色有效新增数。 + +例如:A 2021 年给 B 项目提交了 PR,但是从未参与 Code Review 工作。2023 年 A review 了 B 项目中的 PR,则称 A 为新增提交者。 + +详细角色新增情况如下图和下表所示。 + +![7-18.png](/image/data/chapter_7/7-18.png) + +
图 7.18 2023 年开源社区角色新增图
+
+ +
表 7.7 OpenRank 排名前 10 项目新增开发者角色数量分布 +
+
+ +| 仓库名 | 新增提交者 | 新增贡献者 | 新增参与者 | 新增探索者 | +|------------------------------------------|-------|-------|-------|-------| +| NixOS/nixpkgs | 1226 | 1622 | 1591 | 3027 | +| home-assistant/core | 538 | 808 | 4640 | 8998 | +| microsoft/vscode | 263 | 394 | 10216 | 15746 | +| MicrosoftDocs/azure-docs | 352 | 1420 | 3913 | 1579 | +| pytorch/pytorch | 391 | 802 | 2083 | 13016 | +| godotengine/godot | 386 | 708 | 2834 | 22996 | +| flutter/flutter | 184 | 455 | 3954 | 13579 | +| odoo/odoo | 244 | 453 | 472 | 4991 | +| digitalinnovationone/dio-lab-open-source | 40 | 3611 | 732 | 504 | +| microsoft/winget-pkgs | 231 | 957 | 485 | 1373 | + + +结果表明: + +- 最高新增 Stars 的仓库是 godotengine/godot,达到了 22,996,其中有一半的新增来自于 2023.9,是由于当月 Unity 公司发布的新收费策略导致大量游戏开发者寻找开源游戏开发引擎替代品带来的; digitalinnovationone/dio-lab-open-source 和 microsoft/winget-pkgs 的新增探索者最少,分别为 504 和 1,373; +- 最高新增参与者的仓库是 microsoft/vscode,达到了 10,216;digitalinnovationone/dio-lab-open-source 的新增 Issues 最少,为 732; +- 最高新增开发者的仓库是 NixOS/nixpkgs,达到了 1,622; +- 最高新增维护者的仓库同样是 NixOS/nixpkgs,达到了 1,226。 + +#### 7.3.3 开发者演化视角 + +开发者演化过程定义为:一个开源社区中有多少角色转向其他角色。本报告中我们只度量某一角色转向更深层次角色的开发者。例如某用户 2023 年以前是参与者 ,到了 2023 年该用户提出了自己第一个 PR ,因此由参与者转变为贡献者。 + +![7-19.png](/image/data/chapter_7/7-25.png) + + +
图 7.19 开发者角色演化图
+
+ +
表 7.8 OpenRank 排名前 10 项目角色转化数量分布 +
+
+ +| 仓库名 | 贡献者 -> 提交者 | 参与者 -> 贡献者 | 探索者 -> 参与者 | +|:----------------------------------------:|:----------------:|:----------------:|:----------------:| +| NixOS/nixpkgs | 254 | 122 | 168 | +| home-assistant/core | 70 | 113 | 134 | +| microsoft/vscode | 16 | 70 | 287 | +| MicrosoftDocs/azure-docs | 129 | 169 | 21 | +| pytorch/pytorch | 60 | 53 | 187 | +| godotengine/godot | 63 | 131 | 330 | +| flutter/flutter | 31 | 91 | 419 | +| odoo/odoo | 55 | 19 | 32 | +| digitalinnovationone/dio-lab-open-source | 0 | 0 | 0 | +| microsoft/winget-pkgs | 49 | 11 | 18 | + +结果表明: + +- 在各社区中,我们可以观察到典型的漏斗模型,即从探索者到参与者,再到贡献者和提交者的演化路径。以 godotengine/godot 为例,有 330 名贡献者成功演化为提交者,131 名参与者演化为贡献者,而 63 名探索者演化为参与者。这一趋势在其他社区也得到了体现,符合社区成员从初次探索到深度参与的一般发展过程。 +- 在一些社区中,例如 NixOS/nixpkgs,我们观察到贡献者向提交者的演化数量相对较大。在该社区中,254 名贡献者成功演化为提交者,这可能代表相对高的代码审查需求,鼓励更多的贡献者深度参与到维护工作中。这种机制可能有助于提高社区代码的质量和稳定性。 +- 在一些社区,例如 flutter/flutter 和 godotengine/godot,我们观察到相对较多的探索者成功转化为参与者。在 flutter/flutter中,419 名探索者演化为参与者,而在 godotengine/godot 中,有 330 名探索者变为参与者。 +- digitalinnovationone/dio-lab-open-source 项目由于在 2023 年创建,并没有数据。 + +### 7.4 机器人账号分析 + +机器人(bot)自动化是开源协作平台中的重要贡献力量,本部分分析了 2023 年全年近 6 亿条仓库事件,涉及 770 万开源仓库,以及 1,200 多个机器人账号。 + +#### 7.4.1 机器人活跃数据分析 + +
+ 7-21 + 7-20 +
+ +
图 7.20 机器人事件数趋势(左) 2023 年机器人事件数比例(右)
+
+ +分析 2015 年至 2023 年的机器人活动数据,部分观察如下: + +机器人事件数量从 4,217,635 个增长至 304,257,084 个,增长趋势显著。从 2019 年起,机器人事件数量急剧上升,尤其是在 GitHub 平台 2019 年至 2021 年间,GitHub 上机器人账号活动量呈现爆炸式增长,增长是由于 GitHub 自动化、持续集成 / 持续部署(CI/CD)工具的普及和完善有关。 + +尽管机器人账号数量不多,但每个机器人服务于多个仓库,展现出高效率和广泛影响力 + +#### 7.4.2 机器人事件类型分析 + +![7-22.png](/image/data/chapter_7/7-22.png) + +
图 7.21 GitHub 事件计数的数量和年度增长率(%)的差异(2022 vs 2023)
+
+ +这幅图表展示了 2022 年与 2023 年间 GitHub 各事件类型的数量变化及其增长率。通过对比这两年的数据,我们可以洞察机器人账号在开发流程中的使用趋势: + +- 代码推送的主导地位:PushEvent 在机器人账号活动中占据主导地位,尤其在 2023 年数量显著上升,表明机器人账号在代码维护和更新方面扮演着重要角色; +- 项目创建活动的变化:CreateEvent 在 2022 年非常活跃,但在 2023 年有所减少,这可能表明机器人账号在新项目创建方面的活动有所下降; +- 代码审查和协作的重要性:PullRequestEvent 和 IssueCommentEvent 数量在两年中都较高,显示机器人账号在代码审查和问题讨论中的积极参与; +- 活动类型的变化:DeleteEvent 在 2023 年相比 2022 年有所减少,而 ReleaseEvent 则有所增加,这反映出机器人账号在项目生命周期管理中的不同聚焦点; +- 注释相关事件的增加:CommitCommentEvent 和 PullRequestReviewCommentEvent 在 2023 年有所增加,表明机器人账号在代码审查过程中的讨论和反馈越来越活跃; +- 机器人账号的特定用途:较少见的事件类型如 GollumEvent、MemberEvent、PublicEvent 和 WatchEvent 在数量上相对较低,表明机器人账号主要用于特定的自动化任务,而在社交互动方面的参与较少。 + + + +#### 7.4.3 机器人账号工作时间分布 + +类似开发者工作时间分布,可以对机器人的工作时间数据进行分析。 + +![7-23.png](/image/data/chapter_7/7-23.png) + +
图 7.22 机器人账号工作时间分布
+
+ +- 机器人账号的工作时间分布主要集中在凌晨 0 点至 1 点和中午 12 点至 13 点; +- 根据全域开发者时区可以推测大多数自动化流程在凌晨和中午时段较为活跃; +- 机器人工作活跃时间与工作日和非工作日相关度较小,大多数自动化协作任务为定时任务,较少的自动化任务与对贡献者的活动事件产生响应有关。 + +#### 7.4.4 GitHub 协作机器人事件数量排行榜 + +![7-24.png](/image/data/chapter_7/7-24.png) + +
图 7.23 2023 年 GitHub 协作机器人事件数量排行榜
+
+ +## 八. 案例分析 + +### 8.1 openEuler 社区案例分析 + +在 2023 年,Gitee 数据首次融入 OpenDigger 社区,使得 Gitee 上的项目也参与到了 OpenRank 的计算中。在这一年,openEuler 社区以 16,728 的 OpenRank 值超越 PaddlePaddle,成为仅次于 openHarmony 的中国第二大开源社区。 + +在 2023 年,openEuler 社区吸引了 3,941 名开发者参与 Issue 或 PR 的协作讨论,其中有 1,934 名贡献者成功为 openEuler 社区的仓库贡献并合入了至少一个 PR。 + +值得一提的是,openEuler 社区在 2023 年初发起了文档捉虫活动,并在社区文档官网中嵌入了与 Gitee 打通的交互式页面贡献机制。这一机制使得开发者在阅读文档时发现错误时,能够在官网上直接修改并一键发起 Gitee 轻量级 PR,而无需跳转到 Gitee 平台或进行 Git 操作。 + +这一创新的机制带来的数据变化令人瞩目,openeuler/docs 仓库在 2023 年合入了 7,764 个 PR,其中 74% 的 PR 是通过官网页面直接提交的。这一机制的上线也显著地提升了每月平均活跃贡献者数量(从 30 人提升到 80 人)和每月平均合入 PR 数量(从 116 个提升到 722 个)。 + +另一个值得关注的项目是 openeuler/mugen,作为 openEuler 社区的测试框架项目,该项目活跃度极高。在 2023 年,有 138 位开发者参与到项目讨论和贡献中,其中 95 位开发者成功合入了 PR。该项目在整个 openEuler 社区中的 OpenRank 排名第三,仅次于 openeuler/docs 文档仓库和 openeuler/kernel 内核仓库。这个优秀的测试框架为开发者提供了快速编写和运行测试用例的便利,可以帮助他们验证贡献代码的正确性和有效性,极大降低了后续贡献的流程成本。 + +综上所述,openEuler 社区之所以取得较高的 OpenRank 值,并非偶然。他们不仅为高频低门槛的文档贡献设计了几乎零流程成本的交互式贡献机制,还提供了优秀的测试框架,助力贡献者在贡献代码的同时快速验证其正确性。这些优秀的开发者体验优化,是各个开源社区值得学习和推广的范例。 + +### 8.2 中国开发者贡献的顶级仓库列表 + +我们使用目前已采集的近千万 GitHub 开发者账号的详细信息,其中包含了中国开发者账号近 20 万,通过分析这 20 万开发者在 2023 年 OpenRank 贡献度情况,获得中国开发者贡献仓库总榜如下: + +![8-1.png](/image/data/chapter_8/8-1.png) + +
图 8.1 GitHub 中国开发者贡献仓库排行榜(Top 30)
+
+ +大部分项目是在 OpenRank 大榜中有体现的,比较有趣的项目包括: + +- [NixOS/nixpkgs](https://github.com/NixOS/nixpkgs):这也是一个国际顶级项目,一个新操作系统的包管理工具,虽然大部分更新都是包信息的更新,但也意味着这个操作系统生态本身的繁荣。 + +- [intel-analytics/BigDL](https://github.com/intel-analytics/BigDL):将 LLM 运行在 Intel XPU 上的运行库,这个仓库 17 年就建立了,到 2021 年底时近乎已经废弃,但随着 2022 年 LLM 崛起,这个仓库起死回生,目前保持每月 50 人左右的活跃规模。 + +
+ 8-2 +
+ +
图 8.2 BigDL OpenRank 变化趋势图 +
+
+ +>以上截图来自 [HyperCRX](https://github.com/hypertrons/hypertrons-crx) + +- [siyuan-note/siyuan](https://github.com/siyuan-note/siyuan):思源笔记,一款隐私优先的国产开源知识管理工具,支持双向知识块级引用,保持每月百人的社区活跃规模。支持订阅商业化,价格非常亲民。 + +- [baidu/amis](https://github.com/baidu/amis):百度开源的低代码页面生成框架,低代码项目近年来非常火爆,例如阿里开源的 LowcodeEngine,鸿蒙生态的 DevEco Studio 等为开发者低代码快速开发应用提供了巨大便利。 + +- [cocos/cocos-engine](https://github.com/cocos/cocos-engine):国产的游戏引擎佼佼者,随着元宇宙概念的兴起,godot 等游戏引擎成为全球重要的顶级开源项目,而国产的游戏引擎 cocos/cocos-engine 也在国内有着优秀的表现。 + +- [MaaAssistantArknights/MaaAssistantArknights](https://github.com/MaaAssistantArknights/MaaAssistantArknights) 这个项目很有意思,是明日方舟这款游戏的一个自动化刷日常任务的脚本助手项目,可以在手机模拟器上进行游戏日常任务的自动化执行。这个仓库超过 10k star,每月稳定有 300 多人活跃,非常惊人的活跃度。而且项目完全开源免费,社区维护,且支持所有桌面平台。 + +![8-3.png](/image/data/chapter_8/8-3.png) + +
图 8.3 MaaAssistantArknights 项目截图 +
+
\ No newline at end of file diff --git a/en/commercialization.md b/en/commercialization.md new file mode 100644 index 0000000..1a4ad0a --- /dev/null +++ b/en/commercialization.md @@ -0,0 +1,1511 @@ +--- +outline: deep +--- +# OSS Commercialization + +## 1. Overview +In the Commercialization chapter of the Open-source Annual Report for the past two years, the underlying drivers of successful commercialization of open-source software, possible commercialization paths for open-source software companies, decision making criteria of investors in open-source projects, and case studies are presented. Last year, combined with some trends and changes in the market environment at that time, we discussed the drivers, challenges and realization paths for domestic open-source projects to explore the process of globalizationand commercial development, which triggered a lively discussion among many open-source buddies. + +In 2022-2023, the field of AI has seen an explosion of pre-trained large language model (LLM) technology, which has sparked widespread interest across society and is predicted to continue to deepen its impact on life and work in the future. It is not difficult to find that in this wave of AI technology iteration, the open-source ecosystem has also played a essential role in promoting the development of technology, and there are many open-source models as well as open-source projects actively seeking commercialization. However, there are numerous differences between open-source models and traditional open-source software. In such an era, the commercial development of AI open-source projects and open-source models has become a topic worthy of in-depth research and discussion. + +The security and controllability of open-source projects, including open-source software and open-source models, is one of the key considerations for business users in the commercialization process. Combined with the current technology trends, the analysis of the security of open-source software, the controllability of open-source models, and open-source commercial licenses are topics of interest. + +Capital is an important participant in promoting the development of open-source markets. For investment institutions, when judging an open-source project, they will often consider the following points:In the product development stage, the focus should be on whether the team has the ownership and control of the code, and whether it has international competitiveness; in the community operation stage, the main point is to see whether the operating ability is strong enough; in the commercialization stage, the market matching ability and the maturity of the business model will become the main focus. + +As the first organization in the field to focus on open-source and continue to work on it, Yunqi Partners has successfully identified and invested in open-source companies such as PingCAP, Zilliz, Jina AI, RisingWave Lab, TabbyML, etc., and continues to participate in the construction of the open-source ecosystem. + +In order to further enrich the content of the report, this year we are honored to jointly organize a series of closed-door discussion Meetup with Open-source Society. We had a deep discussion on the development of open-source commercialization related to the development of AI Infrastructure, the development of open-source LLMs, together with industry guests including Microsoft, Google, Apple, Meta, Huawei, Baidu and other domestic and international manufacturers, Stanford University, Shanghai Jiao Tong University, China University of Science and Technology, UCSD and other universities and research institutes, as well as a large number of domestic and international front-line entrepreneurs open-source open-source LLMSome of the key insightsare included in this report. + +This chapter is written by the investment team of Yunqi Partners, the topics discussed this year focusing oncutting-edge trends and technology, together with some outlookand prediction.We combined industry participants experience and opinions to put forward our views, if there are inconsiderate or different ideas, further discussion is highly welcomed. + +Key elements include: + +**Open source ecosystem for rapid AI growth** + +**Open source security challenges** + +**Capital market situation for open source projects** + + +## 2. Open source ecosystem fuels rapid AI development + +### 2.1 The proliferation of pre-trained LLM is strongly driven by open source + +#### 2.1.1 Rapid development of pre-trained LLMs + +The development of pre-trained LLM has been groundbreaking over the past few years, and they have become a major landmark in the field of AI. These models, not only are growing in scale, but also have made huge leaps in their intelligent processing capabilities. From the complexity of the language processing to the finesse of image processing, and the depth of advanced data analysis, these models demonstrate unprecedented capability and precision. Especially in the field of Natural Language Processing (NLP), pre-trained LLM, such as the GPT series, have been able to simulate complex human languages by learning a large amount of textual data for high-quality text generation, translation and comprehension. These models not only show significant improvements in expression fluency, but also show an increasing ability to understand context and capture subtle linguistic differences. + +In addition, these LLMs perform extremely well in complex data analysis. They are capable of extracting meaningful patterns and correlations from huge data sets to support a wide range of fields such as scientific research, financial analysis, and market forecasting. It is worth noting that the development of these models is not limited to their own enhancements. As they are popularized and applied, they are driving technological advances across the industry and society as a whole, facilitating the creation of new applications such as intelligent assistants, automated writing tools, advanced diagnostic systems, etc. Their development opens up more new development directions for incoming AI applications and research, indicating a new round of technological innovation. + +Enthusiasm for AI among public users is surging rapidly. Number of ChatGPT users reached 100 million in just 2 months, compared to TikTok's 9-month-record. This is not only a huge commercial success, but also a major milestone in the history of AI technology development. + +
+ +| ![image001](https://raw.githubusercontent.com//kaiyuanshe/2023-China-Open-Source-Report/main//public/image/commercialization/chapter_2/2-1.png) | +| -------------------------------------------------------- | + +
+ +
Figure 2.1 Time to reach 100 million users for major apps (in months)
+
+ +Along with the growing AI popularity, the global AI market size is also growing rapidly. According to Deloitte, it will grow at a CAGR of 23% during 2017-2022, and is expected to reach seven trillion dollars in 2025. + +
+ +| ![image002](https://raw.githubusercontent.com//kaiyuanshe/2023-China-Open-Source-Report/main//public/image/commercialization/chapter_2/2-2.png) | +| -------------------------------------------------------- | + +
+ +
Figure 2.2 Global AI Market Size (Trillions of Dollars)
+ +#### 2.1.2 Open Source Power for AI + +The power of the open-source ecosystem has played an essential role in making such great strides in pre-trained models. This includes not only research from academia but also support from industry. Under the joint efforts of the open-source ecosystem, the performance of the open-source-based LLM is rapidly developing and gradually rivaling that of closed-source. + +**The power of open source from academia has contributed significantly to the evolution of AI technology** + +Since Princeton University published ImageNET in 2009, a significant paper in computer vision, there has been a gradual increase in the number of papers related to AI machine learning. Over the years, researchers have proposed many open-source algorithms. By 2017, the number of papers on AI machine learning on Arxiv had reached over 25,000. The "Attention Is All You Need" paper was published that same year, introducing the open-source Transformer model. The publication of this paper led to a concentrated surge in research and papers on LLM. As a result, from 2017 to 2023, the number of Arxiv papers related to LLM surged to over 100,000. This surge also considerably accelerated the open-source development of related models and laid the theoretical foundation for the subsequent explosion of LLM technology. + +
+ +| ![image003](https://raw.githubusercontent.com//kaiyuanshe/2023-China-Open-Source-Report/main//public/image/commercialization/chapter_2/2-3.png) | +| -------------------------------------------------------- | + +
+ +
Figure 2.3 Cumulative number of AI / Machine Learning related papers published on Arxiv
+
+ +:::info Expert Review +**Willem Ning JIANG**:This insight is quite exciting, and academic open-source plays a very important role. +::: + +**The industry's open source power fuels rapid development of LLM** + +With the ChatGPT LLM popularity, more and more technicians are devoted to the research and development of LLMs. In addition to closed-source products, many great open-source LLMs are also leading the industry. Stable Diffusion in 2022, with its powerful graphical capabilities and community strength, quickly caught up with Midjourney, a famous closed-source graphical model, and has already taken the lead in some aspects; the robust capabilities of open-source large language models, represented by Meta LLaMA 2, have made Google researchers reflect that "we don't have a moat, and neither does OpenAI"; and there are also emerging open-source leaders in various fields, such as Dolly, Falcon, etc. With its powerful community resources and cheaper cost of use, Open-source LLM quickly gained many business and individual users, acting as an indispensable force in the development of LLM. + +
+ +| ![image004](https://raw.githubusercontent.com//kaiyuanshe/2023-China-Open-Source-Report/main//public/image/commercialization/chapter_2/2-4.png) | +| -------------------------------------------------------- | + +
+ +
Figure 2.4 Emerging Open Source LLMs
+
+ +**Performance of open-source LLMs is rapidly catching up with closed-source** + +Closed-source LLM represented by OpenAI ChatGPT4 started earlier, and the number of parameters and various performance metrics showed a tendency to outperform open-source models in the early stage. However, open-source models have a strong community and technical support, resulting in rapid performance growth. The most mature version of ChatGPT4 scored 1,181, while Llama 2, an open-source LLM launched less than four months ago, scored 1,051, with a difference of only 11%. It's worth noting that the rankings 4-9 are all open-source LLMs, indicating that the growth in open-source LLM performance is not an isolated case but an industry trend. Open-source LLMs are highly cost-effective due to their low usage costs and smaller performance gap compared to closed-source LLMs, which makes them attractive to increasing numbers of business and individual users. Please see the more detailed discussion of costs later. + +Benefiting from the open nature of open-source models, users can easily fine-tune LLMs to fit different vertical application scenarios. Fine-tuned LLMs are more industry-specific than general-purpose LLMs, which is an advantage that closed-source models cannot provide. + +
+ +| ![image005](https://raw.githubusercontent.com//kaiyuanshe/2023-China-Open-Source-Report/main//public/image/commercialization/chapter_2/2-5.png) | +| -------------------------------------------------------- | + +
+ +
+ +Figure 2.5 [ELO ratings](https://en.wikipedia.org/wiki/Elo_rating_system) of LLMs based on user feedback +
+ +#### 2.1.3 The three layers of the LLM + +The technical architecture of the LLM is divided into three main layers, as shown in the figure below. Open source has made significant contributions to the model layer, the developer tools layer, and the application layer. Each layer has its unique function and importance, and together, they form the complete architecture of the large-scale model technology. The subsequent sections (2.2, 2.3, 2.4) will discuss each layer in detail. + +
+ +| ![image006](https://raw.githubusercontent.com//kaiyuanshe/2023-China-Open-Source-Report/main//public/image/commercialization/chapter_2/2-6.png) | +| -------------------------------------------------------- | + +
+ +
Figure 2.6 Technical Layers of the LLM
+
+ +- **Model layer** + +The model layer is the foundation of the entire architecture, including the core algorithms and computational frameworks that make up the LLM, typical models such as GPT and Diffusion are the core of generative AI. This layer involves model training, including pre-processing of large amounts of data, feature extraction, model optimization and parameter tuning. The key to the model layer is efficient algorithm design and large-scale data processing capabilities. + +- **Development tools layer** + +The development tools layer provides the necessary tools and platforms to support the development and deployment of LLM, including various machine learning frameworks (e.g., TensorFlow, PyTorch) and APIs that simplify the process of building, training, and testing models. The development tools layer may also include cloud services and computing resources that support model training and deployment. In addition, this layer is responsible for version control, testing, maintenance, and updating of the model. + +- **Application layer** + +The application layer mainly considers how to access the LLM capabilities in real applications. This layer integrates models into specific business scenarios, such as intelligent assistants, automated customer service, personalized recommendation systems, etc. The key to the application layer is translating complex models into user-friendly, efficient, and valuable applications while ensuring good performance and scalability. + +These three layers are interdependent and constitute the complete architecture of the LLM technology; from the basic construction of the model to the realization of specific applications, each layer plays an important role. The corresponding open-source content for each of the three layers is discussed in detail next. + +### 2.2 Open source is the second driving force fuelling the development of foundation models + +#### 2.2.1 Supply side:Concentrate on R&D + +**Saving the number of developers and centralizing R&D capabilities** + +The development of AI models requires technical expertise, and there is a shortage of related talent in China. Open-source technology can promote the development of advanced AI functionality and alleviate pressure on SMEs. Open-source Language Models lower the entry barrier and save development time, enabling more researchers to access advanced AI technologies directly. + +Based on efficient pre-trained models, developers can directly innovate and improve in a targeted way rather than being distracted from building the infrastructure. This concentration on innovation rather than infrastructure has greatly contributed to rapid technological advances and the expansion of applications. At the same time, sharing open-source models facilitates the dissemination of knowledge and technology, providing a platform for developers worldwide to learn and collaborate, which plays a crucial role in driving overall progress across the industry. + +**Saving computational power and avoiding reinventing the wheels** + +As the performance of the LLM continues to grow, so does its number of parameters, which has jumped 1,000 times in the past five years. According to estimates, ChatGPT chip demand reaches more than 30,000 NVIDIA A100 GPUs, corresponding to an initial investment of about 800 million U.S. dollars, with daily electricity costs of $50,000. The computational requirements for training are becoming more and more costly, so reinventing wheels over and over again is a massive waste of resources. Coupled with the U.S. ban on NVIDIA's A100/H100 supply to mainland China, it's becoming increasingly difficult for domestic companies to train on LLMs. The open-source pre-trained LLM has become a perfect choice, which can solve the current dilemma so that more companies can leverage LLMs for secondary development. + +Four steps are required for LLM training:pre-training, supervised fine-tuning, reward modeling, and reinforcement learning. The computational time for pre-training occupies more than 99% of the entire training cycle. Thus, the open-source model can help developers of LLM platforms directly skip the steps, with 99% of the cost of investing limited funds and time in fine-tuning steps, which is a significant help to most application layer developers. Many SMEs need model service providers to customize models for them. The open-source ecosystem can save a lot of costs for the secondary development of LLMs and thus can give birth to many startups. + +
+ +| ![image007](https://raw.githubusercontent.com//kaiyuanshe/2023-China-Open-Source-Report/main//public/image/commercialization/chapter_2/2-7.png) | +| -------------------------------------------------------- | + +
+ +
Figure 2.7 Increasing number of large model parameters
+
+ +**Open source allows for exploration of a wider range of technological possibilities** + +Whether the world-shattering Transformer model is the optimal solution is still unanswered, and whether the next best thing is an RNN (Recurrent Neural Network) is still in question. However, due to the open-source ecology, developers can try on different branches of the AI family cohesive with various new development forces, ensuring the diversity of technological development. Therefore, the human exploration of the LLM will be unrestricted to the local optimal solution and will promote the possibility of continuous development of AI technology in all directions. + +#### 2.2.2 Demand side: lowering the barriers to capture the market + +**Open source models significantly reduce costs for model users** + +Deploying an open-source model initially requires some investment, but as usage increases, it exhibits scale economy, and the cost of usage is more controllable compared to closed-source. If you have a usage scenario where the average daily request frequency has an upper limit, then directly invoking the API is less expensive. However, if you have a higher request frequency, deploying the open-source model is less costly, so you should choose the appropriate method based on your actual usage.
+ +
+ +| ![image008](https://raw.githubusercontent.com//kaiyuanshe/2023-China-Open-Source-Report/main//public/image/commercialization/chapter_2/2-8.png) | +| -------------------------------------------------------- | + +
+ +
Figure 2.8 Cost Comparison of Calling OpenAI APIs and Deploying Open Source Models on the AWS Cloud
+
+ +Comparison of directly calling OpenAI's API and deploying Flan UL2 model on public cloud as an example: + +According to the latest data from OpenAI's official website, using ChatGPT4 model, the input is $0.03 / 1000 tokens and the output is $0.06 / 1000 tokens. Considering the relationship between input and output and assuming an average cost of $0.04 / 1000 tokens, each token is about 3/4 of an English word, and the number of tokens in a request is equal to the prompt word + the output tokens. Assuming a block of text is 500 words, or about 670 tokens, the cost of a block of text is 670 x 0.004/1000 = $0.00268. + +Suppose the open-source model is deployed on the AWS cloud, taking the Flan UL2 model with 20 billion parameters, as mentioned in the related tutorial published by AWS, as an example. In that case, the cost consists of three parts: + +- Fixed cost of deploying models as endpoints using AWS SageMaker is about $5-6 per hour or $150 per day +- Connecting SageMaker endpoints to AWS Lambda: Assume responses are returned to users in 5s, using 128MB of memory. The price per request is: 5000 x 0.0000000021 (unit price per millisecond for 128MB) = $0.00001 +- Open this Lambda function as an API via API Gateway. The Gateway costs about $1 / 1 million requests or $0.000001 per request. + +Based on the above data, it can be calculated that the total cost of the two is equal when the number of requests is 56,200 in a day. When the number of requests reaches 100,000 per day, the cost of using ChatGPT4 is about $268, while the cost of the open-source big model is $151; when the number of requests reaches 1,000,000 per day, the cost of using ChatGPT4 is about $2,680, while the cost of the open-source big model is $161. It can be found that the cost savings of the open-source big model are significant as the request volume increases. + +**Open source improves the explanability and transparency of models and lowers the barrier to technology adoption** + +Open-source models are more accessible for evaluation than closed models. Open-source models provide access to their pre-training results, and some even disclose their training datasets, model architectures, and more, making it easier for researchers and users to conduct in-depth analyses of the LLMs and comprehend their strengths and weaknesses. Scientists and developers worldwide can review, evaluate, explore, and understand each other's underlying principles, improving security, reliability, explanability, and trust. Sharing knowledge widely is crucial to promote technological progress and also helps to reduce the possibility of technology misuse. Closed-source models can only be evaluated through performance tests, essentially a "black box." It is hard to measure the strengths and weaknesses, applicability scenarios, and other factors of the closed-source models, and their explanability and transparency are considerably lower than that of open-source models. + +Closed-source models can face the risk of being questioned for their originality. Users cannot be sure that closed-source models are genuinely original, leading to concerns about copyright and sustainability issues. On the other hand, open-source models are more convincing to users because the code is available to validate their originality. According to Hugging Face technician comments, open-source models like Llama2, which have published details of training data, methods, labeling, and so on, are more transparent than the black box of closed-source LLMs. With transparency in the articles and the code, users know what's in there when they use it. + +Higher explainability and transparency are conducive to enhancing users' trust, especially business users, in the LLMs. + +**Business users can realize specific needs with open source base models** + +Business users have multiple types of specific needs, such as:industry-specific fine-tuning, local deployment to ensure privacy, and so on. + +As the amount of LLM parameters continues to increase, the training cost continues to rise. There are better solutions than simply growing the LLM parameters to improve performance. On the contrary, fine-tuning for a specific problem can quickly improve the performance of LLM targeting to achieve better results with less effort. For example, WizardMath, an open-source LLM of mathematics fine-tuned by Microsoft based on LLaMA2, has only 70 billion parameters, but after testing on the GSM8k dataset, the mathematical ability of WizardMath directly beats that of several LLMs such as ChatGPT, Claude Instant 1, PaLM 2-540B, and so on, which fully shows the critical role of fine-tuning in improving the professional problem-solving ability of LLMs, which is also a significant advantage of the open-source LLM.
+ +
+ +| ![image009](https://raw.githubusercontent.com//kaiyuanshe/2023-China-Open-Source-Report/main//public/image/commercialization/chapter_2/2-9.png) | +| -------------------------------------------------------- | + +
+ +
Figure 2.9 Performance Ranking of WizardMath
+
+ +Many business users have incredibly high data privacy requirements, and the ability to deploy open-source LLMs locally greatly protects business privacy. When clients call closed-source LLMs, the closed-source models always run on the servers of companies such as OpenAI. Clients can only send their data remotely to the servers of the LLM providers, which is very unfavorable to privacy protection. Enterprises in China also face related compliance issues. While the open-source LLM can be locally deployed, all the data is processed within the company owning the data, even allowing offline processing, significantly protecting the clients' data security. + +**Open source model facilitates long-lasting customer experience** + +FCreating a reliable dataset for enterprises is crucial to keeping up with the constant changes in open-source models. Open-source models can be customized to fit an enterprise's specific needs, but this requires a high-quality dataset. By investing in a dataset, enterprises can fine-tune multiple models and avoid constantly replacing them with newer versions, which saves money in the long run, as the dataset does not need to be updated continuously. Enterprises can leverage the model's capabilities without incurring significant costs. + +Open-source models are updated quickly to meet the changing needs of users. The power of R&D in the open-source community quickly fills the shortcomings of open-source LLMs. LLaMA2 itself lacks a Chinese corpus, leading to unsatisfactory Chinese comprehension; however, only the day after LLaMA2 was made available, the first open-source Chinese LLaMA2 model, "Chinese LLaMA27B", appeared in the community and could be downloaded and run. Adequate community power support can meet the different needs of users. In contrast, closed-source companies usually need help to take care of the distinct needs of various types of users comprehensively. + +**Open source helps to capture market opportunities** + +Open-source models are more accessible to users and can expand the market quickly due to their low barrier to entry. Stable Diffusion, an open-source image generation model, has become an essential competitor to MidJourney, a closed-source model, because of its large developer community and diverse application scenarios. Although not as good as MidJourney in some ways, Stable Diffusion has captured a significant share of the image generation market with its open-source and free features, making it one of the most popular image generation models. Its success has also brought widespread attention and investment to the companies after it, RunwayML and Stability AI. + +#### 2.2.3 Ecological Side:Converging Diversity for Long-Term Growth + +**Open source facilitates large model companies to quickly seize ecological resources** + +The low threshold and easy accessibility of open-source models will also help the models quickly capture relevant ecological resources. StableDiffusion is an open-source project that has received positive responses and support from many freelance developers worldwide. Many enthusiastic programmers were actively involved in building the easy-to-use graphical user interface (GUI). Many LoRA modules have been developed to provide Stable Diffusion with features such as accelerated generating, more vivid images, etc. According to the official website of Stable Diffusion, one month after the release of Stable Diffusion 2.0, four of the top ten apps in the Apple App Store are AI painting apps based on Stable Diffusion. A thriving ecosystem has become the solid foundation of Stable Diffusion. + +At the time of the original release of the open-source LLM LlaMA2, there were 5,600 projects on GitHub containing the "LLaMA" keyword and 4,100 projects containing the "GPT4" keyword. After two weeks, the LLaMA-related ecosystem has grown significantly, with 6,200 related projects compared to 4,400 "GPT4"-related projects. For LLM companies, ecosystem means markets, technological power, and inexhaustible driver for growth. With lower barriers, open source can grab ecological resources faster than closed-source models. Therefore, open-source LLM companies should seize this advantage, strengthen communication with community developers, and provide them with sufficient support to promote the rapid development of relevant ecosystems. + +**Open source facilitates large model vendors to pry the market and gain business alliances** + +After LLaMA2 was commercially open-sourced, Meta quickly cooperated with Microsoft and Qualcomm. As the major shareholder of OpenAI, Microsoft chose to collaborate with open-source vendor Meta, which means that open-source has become a force to be reckoned with. For future collaboration, Meta stated that users of Microsoft Azure cloud service will be able to fine-tune the deployment of Llama2 directly on the cloud. Microsoft disclosed that Llama2 has been optimized for Windows and can run directly on Windows locally. + +The collaboration between the two companies highlights that open-source LLMs and cloud vendors have a natural cooperation foundation. Not coincidentally, there is a similar trend in domestic open-source LLM vendors: Baidu's ERNIE and Ali's Qwen are both open-source LLMs. Although users usually do not pay for using open-source LLMs, they need to pay for the computational power using Baidu Cloud and Ali Cloud as computational platforms. + +Meta's partnership with Qualcomm also signals its expansion into the mobile sector. Due to its broad audience, open-source LLMs can be deployed locally. With other advantages, mobile phones have become the future of convenient use of LLMs of a vital carrier. This also attracts mobile phone chip manufacturers to collaborate with open-source model vendors. + +In summary, the open-source LLM, with its broad reach, facilitates the company behind it to find partners and pry into the market. + +**Open source can mobilize a wide range of communities and bring together diverse development forces** + +The power of the community has always been an essential strength of open source. As shown in the figure below, the generative AI projects on GitHub have realized rapid growth in 2022, soaring from 17,000 to 60,000. The rapidly growing community can not only quickly provide a large amount of technical feedback for open-source LLM developers but also fully enhance the end reach of open-source LLMs and fine-tune the application of open-source models to various vertical domains to bring more users to the LLMs. + +
+ +| ![image010](https://raw.githubusercontent.com//kaiyuanshe/2023-China-Open-Source-Report/main//public/image/commercialization/chapter_2/2-10.png) | +| --------------------------------------------------------- | + +
+ +
Figure 2.10 Changes in the number of generative AI-related projects open-sourced on GitHub (Source: GitHub)
+
+ +Open-source language models (LLMs) are built with contributions from developers worldwide from different cultures, regions, and technical backgrounds. This is in contrast to closed-source models. The graph below shows that contributors from various countries, including China, India, Japan, Brazil, and others, have made significant contributions to the open-source community for generative AI and the United States. By including contributions from developers worldwide, the open-source LLM can be adapted to suit different regions' customs, languages, industries, and other usage habits. This will make the open-source LLM more versatile and appealing to a broader audience.
+ +
+ +| ![image011](https://raw.githubusercontent.com//kaiyuanshe/2023-China-Open-Source-Report/main//public/image/commercialization/chapter_2/2-11.png) | +| --------------------------------------------------------- | + +
+ +
Figure 2.11 Top 10 global communities creating the most generative AI projects on GitHub (Source:Github)
+
+ +**Domestic open source base LLM is booming, keeping pace with global leaders** + +Based on the domestic ecosystem of tech companies, the country's open-source pre-trained foundation LLMs are also booming, keeping pace with global leaders. + +In June, Tsinghua ChatGLM was upgraded to the second generation, which took the "top spot" in the Chinese circle (Chinese C-Eval list), and ChatGLM3 launched in October not only has a performance comparable to that of GPT-4V at the multimodal level, but also is the first LLM product with code interaction capability in China ( Code Interpreter.) + +In October, the Aquila LLM series has been fully upgraded to Aquila2, and Aquila2-34B with 34 billion parameters have been added. At that time, in 22 evaluation benchmarks in four dimensions, namely, code generation, examination, comprehension, reasoning, and language, Aquila2-34B strongly dominated the top 1 of several lists. + +On November 6, the LLM startup company Zero One Everything, led by Dr. Kai-Fu Lee, officially open-sourced and released its first pre-trained LLM, Yi-34B, which has achieved amazing results in a number of leaderboards, including Hugging Face's Open LLM Leaderboard. + +In December, Qwen-72B, a model with 72 billion parameters from AliCloud's Tongyi Qianwen, topped the Open LLM Leaderboard of Hugging Face, the world's largest modeling community, by overpowering domestic and international open-source LLM models such as Llama 2. + +Domestic open-source pre-trained base LLMs are far more numerous than the above; the booming open-source pre-trained base LLM ecology is exciting, and it includes academic institutions, Internet giants, and some excellent startups. At the end of the report, the statistics of startups and models with open-sourced LLMs are summarized. + +#### 2.2.4 PPaths to Commercialization of Open-source LLMs + +Currently, we are in the era of rapid development of open-source LLM technology, a field that, while promising, also faces significant business model exploration challenges. Based on exchanges with practitioners and case studies, this paragraph attempts to summarize some of the directions of commercialization exploration at this stage. + +**Provision of support services** + +With the emergence of more and more basic open-source technologies, the complexity and professionalism of the software have increased dramatically, and the user's demand for software stability has increased simultaneously, requiring professional technical support. At this time, the emergence of Redhat as a representative of the enterprise began to try to achieve commercialization of the operation based on open-source software, the main business model for the "Support Services" model, for the use of open-source software customers to provide paid technical support and consulting services.The overall complexity and specialization of the current foundation model is high, and the user needs professional technical support as well. + +In the LLM space, Zhipu AI's business model is more similar to Redhat. It provides enterprises with local private deployment services of ChatGLM, a self-developed LLM, providing efficient data processing, model training and deployment services.Provide Wisdom Spectrum LLM files and related toolkits, users can train their own fine-tuned model and deploy reasoning services, on top of which Wisdom Spectrum will provide technical support and consulting related to the deployment of the application, updates of primary model. With this solution, companies can achieve complete control of data and run their models securely.
+ +
+ +| ![image012](https://raw.githubusercontent.com//kaiyuanshe/2023-China-Open-Source-Report/main//public/image/commercialization/chapter_2/2-12.png) | +| --------------------------------------------------------- | + +
+ +
Figure 2.12 Zhipu AI's Pricing Model for Private Deployment
+
+ +**Provision of cloud hosting services** + +Cloud growth has continued to exceed expectations since the development of cloud computing technology.The growing need for flexible and scalable infrastructure is driving IT organizations' cloud spending and increasing cloud penetration worldwide. Against this technological backdrop, there is a growing demand from users to reduce software O&M costs. Cloud hosting services are SaaS that enable customers to skip on-premise deployment and host software as a service directly on a cloud platform. By subscribing to SaaS services,clients can turn high upfront capital expenditures into small recurring expenditures, and relieve O&M pressure to a large extent. Some of the more successful open-source software companies include Databricks, HashiCorp, and others. + +In the field of LLMs, Zhipu AI directly provides standard API products based on ChatGLM, so that customers can quickly build their own proprietary LLM applications, pricing according to the number of tokens of text actually processed by the model. The service is suitable for scenarios that require high level of knowledge, reasoning ability and creativity, such as advertisement copywriting, novel writing, knowledge-based writing, code generation, etc. The pricing is:0.005 yuan / thousand tokens. + +At the same time, Zhipu AI also provides API interfaces for super-simulated LLMs (supporting character-based role-playing, extended multi-round memory, and individualized character dialogues) and vector LLMs (vectorizing the input text information so as to combine with vector databases, provide external knowledge bases for LLMs, and improve the accuracy of LLM inference). + +Hugging Face also offers a cloud-hosted business model. The Hugging Face platform hosts a large number of open-source models and also offers a cloud-based solution, the Hugging Face Inference API, which allows users to easily deploy and run these models in the cloud via an API.This model combines the accessibility of an open-source model with the convenience of cloud hosting, allowing users to use it on demand without having to set up and manage a large infrastructure on their own.
+ +
+ +| ![image013](https://raw.githubusercontent.com//kaiyuanshe/2023-China-Open-Source-Report/main//public/image/commercialization/chapter_2/2-13.png) | +| --------------------------------------------------------- | + +
+ +
Figure 2.13 Hugging Face Cloud Platform Charges
+
+ +**Development of commercial applications based on a foundation model** + +Based on the base model to charge fees, refers to part of the open-source vendor's own base model is free open source, but the vendor based on the base model and developed a series of commercial applications, and for commercial applications to charge for the model, typical cases, such as Tongyi Qianwen. + +AliCloud has developed eight applications based on its open-source base model Tongyi Qianqi:Tongyi Tingwu (speech recognition), Tongyi Xiaomei (to improve customer service efficiency), Tongyi Zhiwen (text comprehension), Tongyi Stardust (personalized roles), Tongyi Spirit Codes (to assist in programming), Tongyi Faerui (legal industry), Tongyi Renxin (pharmaceutical industry), and Tongyi Diaojin (financial industry).Each of these applications has a corresponding enterprise-level payment model. Also some of the apps include a individual-level payment model , such as Tongyi Tingwu. It mainly provides voice-to-text related services such as meeting minutes, and its charges are mainly calculated based on the length of the audio.
+ +
+ +| ![image014](https://raw.githubusercontent.com//kaiyuanshe/2023-China-Open-Source-Report/main//public/image/commercialization/chapter_2/2-14.png) | +| --------------------------------------------------------- | + +
+ +
Figure 2.14 Tongyi Tingwu Pricing Model
+
+ +**Model-as-a-Service business model** + +The lowest level of Model as a Service (abbreviated to:MaaS) means to take the model as an important production element, design products and technologies around the model life cycle, and provide a wide variety of products and technologies starting from the development of the model, including data processing, feature engineering, training and tuning of the model, and services for the model. + +AliCloud initiated the "ModelScope Community" as the advocate of MaaS. In order to realize MaaS, AliCloud has made preparations in two aspects:One is to provide a model repository, which collects models, provides high-quality data, and can also be tuned for business scenarios. Model usage and computational need to be combined in order to provide a quick experience of the model so that a wide range of developers can quickly experience the effects of the model without having to coding. The second is to provide abstract interfaces or API interfaces so that developers can do secondary development for the model. In the face of specific application scenarios, providing fewer samples or zero samples, it is easy for developers to carry out secondary optimization of the model, which really allows the model to be applied to different scenarios.
+ +
+ +| ![image015](https://raw.githubusercontent.com//kaiyuanshe/2023-China-Open-Source-Report/main//public/image/commercialization/chapter_2/2-15.png) | +| --------------------------------------------------------- | + +
+ +
Figure 2.15 AliCloud:Model-as-a-Service
+
+ +**LLM business models need to be explored and experimented with** + +Currently, the business path of open-source LLM companies has not yet been validated by the market, so a large number of companies are actively exploring different business models without sticking to a single pricing strategy. However, so far, no effective business model has been found to cover their high development and operating and maintenance costs, thus making their economic sustainability questionable.This situation reflects, to some extent, the nature of this emerging industry:While technological breakthroughs have been made, the question of how to translate these technologies into economic benefits remains an open one. + +However, it is worth noting that despite such challenges, the rise and development of open-source LLMs still marks the birth of a new industry. This industry has its own unique value and potential, offering unprecedented technical support and innovation possibilities for a wide range of industries. In this process, all participants (including research institutions, enterprises, developers and users) are actively exploring and trying to find a model that can balance technological innovation and economic returns. + +This exploration is not an overnight process; it takes time, experimentation, and a deep understanding of market and technology trends. We are likely to see a variety of innovative business models emerge, such as technical support services, cloud hosting, MaaS, etc. as mentioned above. Although the current business models for these open-source LLMs are not yet mature, it is this kind of exploration and experimentation that will drive the entire LLM field forward and ultimately find a business path to sustainable growth with profitable returns. + +### 2.3 Making AI developer tools open-source has become an industry consensus at this stage + +#### 2.3.1 Developer Tools Play an Important Role in the AI Chain + +The Develop Tools layer is an important link in the chain of AI LLM development. As shown in the figure below, the development tools layer plays the role of the top and bottom, linking the middle layer: + +For taking on computational resources, the development tool layer plays a PaaS-like role.Cloud-based platforms help LLM developers more easily deploy computational, development environments, invoke, and allocate resources, allowing them to focus on the logic and functionality of model development and realize their own innovations. + +For linking pre-trained models, the development tool layer provides a series of tools to accelerate the development of the model layer, including dataset cleaning and labeling tools.
+ +
+ +| ![image016](https://raw.githubusercontent.com//kaiyuanshe/2023-China-Open-Source-Report/main//public/image/commercialization/chapter_2/2-16.png) | +| --------------------------------------------------------- | + +
+ +
Figure 2.16 Location of Developer Tools in the AI LLM Chain
+
+ +To promote the development of AI applications, the developer tools layer plays an essential role in helping enterprises and individual developers to develop and deploy their final products. For enterprise developers, developer tools help to realize the deployment of LLMs in the industry, as well as the monitoring of the model to ensure the regular operation of the enterprise model. Other related functions include model evaluation, database inference, and supplementation of the model running process. For individual developers, developer tools help them simplify deployment steps and reduce development costs, inspiring the creation of more fine-tuned models for specific functions, such as Autotrain by Hugging Face, which allows developers to fine-tune open-source models based on private data with just a few mouse clicks. At the same time, the developer tools also help to establish the connection between the end-user and the LLM APP and even the deployment of the LLM on the end-user's device. + +With the increasing maturity and advancement of development tools, more and more developers are venturing into development related to LLMs. These tools not only improve development efficiency but also lower the barrier to entry, enabling more innovative-thinking talent to participate in the field. From data processing and model training to performance optimization, these tools provide comprehensive support for developers. As a result, we have witnessed the birth of a diverse and active LLM development community with some cutting-edge projects and innovative applications.
+ +
+ +| ![image017](https://raw.githubusercontent.com//kaiyuanshe/2023-China-Open-Source-Report/main//public/image/commercialization/chapter_2/2-17.png) | +| --------------------------------------------------------- | + +
+ +
Figure 2.17 Growing Number of AI LLM Developers
+
+ +LLM development tools are blossoming, covering everything from data preparation and model construction to performance tuning, and they continue to push the frontiers of AI technology. Some tools focus on data annotation and cleaning so that developers can more easily obtain high-quality data; some tools are committed to improving the efficiency of fine-tuning so that the LLM is more in line with the customization needs; there are also tools responsible for the operation of the LLM monitoring, to provide timely feedback to the developers, users. These diverse tools promote technological innovation and provide developers with more choices, together building a vibrant and creative ecosystem for LLM development. There is no shortage of great open-source projects that greatly benefit both users and open-source companies.
+ +
+ +| ![image018](https://raw.githubusercontent.com//kaiyuanshe/2023-China-Open-Source-Report/main//public/image/commercialization/chapter_2/2-18.png) | +| --------------------------------------------------------- | + +
+ +
Figure 2.18 Large number of development tools covering different levels of LLM development
+
+ +#### 2.3.2 Open source for developer tools is important + +**Supply-side benefits** + +Open-source developer tools are conducive to polishing and upgrading the product in different scenarios, which contributes to its rapid maturity. One of the main advantages of open-source developer tools is that they provide an extensive testing and application environment. Because open-source tools are freely available for use and modification by a variety of users and organizations, they are often applied and tested in diverse real-world scenarios and are thus "battle-tested. "This extensive use and feedback helps the product identify and fix potential defects more quickly while facilitating the development of new features and improvements to existing ones. Especially for startups,this is the fastest and most cost-effective way to get product feedback, promote product improvement, and help quickly bring more mature commercialized products to market. + +Open-source developer tools underlying products with high user stickiness are conducive to rapidly spreading the market. As mentioned earlier, developer tools contain many indispensable components of the LLM development process. Once developers become accustomed to specific tools, they tend to use them consistently because changing tools means relearning and adapting to the new tool's features and usage. Therefore, these products naturally have high user stickiness.
+ +![FW-_aFHXEAMjI09](https://hackmd.io/_uploads/By-9g0d5T.jpg) + +
Figure 2.19 High user stickiness of open source development tools
+
+ +The chart shows the net revenue retention rate for major SaaS products, which reflects the retention rate of regular customers, their ability to keep paying, and their loyalty to the product. Developer product stickiness is generally higher than the median, with Snowflake at the top of the list at 174% and Hashicorp, Gitlab, and Confluent at over 120%. + +As you can see, with such high stickiness, the faster the customer acquisition rate, the higher future revenues will be. When these tools are available as open source, they can be more quickly and widely adopted because open source lowers the barrier to trying and adopting new tools. This rapid market expansion is critical to building brand awareness and a user base. + +**Demand-side benefits** + +Open-source developer tools reduce the cost for SMEs to enter the LLM market, making it easier for them to focus more on application layer development. For SMEs, entering the market to develop large-scale models and complex systems often requires significant technical investment and financial support. Open-source developer tools lower this barrier because they are usually free or less expensive overall and contain many proven features and components. SMEs can utilize these resources to develop and test their products without creating all the essential elements from scratch. In this way, they can focus more resources and energy on application-level innovations and solutions for specific business needs rather than spend much time and money building the underlying technology. This reduces the cost of entry and speeds up product development, enabling SMEs to compete more effectively with larger firms. + +Due to the ecological effect of open-source development tools, their technology iterations usually outpace closed-source tools. In such an open-source ecosystem, the latest research results from the lab can be quickly integrated and shared, and such a mechanism ensures the rapid updating and dissemination of technology. Active participation in the open-source community facilitates the rapid exchange of innovative ideas and technologies, making the latest development tools and technological achievements accessible and usable by many developers. The strength of this open-source culture is that it is open and collaborative, providing developers with a quick and easy way to access and utilize state-of-the-art tools. It not only accelerates the development of technology but also offers individual developers or small teams the opportunity to compete with large corporations, promoting the healthy development and innovation of the entire technology sector. + +#### 2.3.3 Open-source developer tools need to emphasize ecological construction. + +**Making developer tools open source requires technical support to maintain a stable community ecosystem** + +Open-source development tools rely on the support and maintenance provided by the community and partners. This is essential to ensure the stability and reliability of the tool. For example, the success of an open-source database management system depends not only on its functionality but also on the community's ability to respond to user-reported problems and provide fixes promptly. At the same time, market feedback from partners and users in the ecosystem is critical to optimizing open-source development tools. If an open-source code analysis tool is widely used in an enterprise environment, the feedback from those enterprise users will directly influence the future direction of the tool. This feedback helps developers understand which features are most popular and which need improvement to tailor the tool to market needs. + +**Open source developer tools need to complement the strengths of cloud vendors to expand market reach and user base** + +The developer tools themselves are to be deployed based on the platform provided by the cloud vendor, whose strength lies in its specialization and technical strength. In contrast, the cloud vendor's advantage lies in delivering the just-needed computational platform and its broader user base. The two collaborate on developer tools, and developers can leverage cloud vendors to offer better computational power deals to attract more users while benefiting from the cloud vendors' own sales channels to gain more substantial end-to-end reach. This virtuous cycle helps to extend open-source development tools to a broader user base. This increases the tool's visibility and provides more opportunities for its practical application and improvement. More users means more feedback, which promotes continuous tool optimization and adaptation to changing market needs. + +MongoDB, for example, started its cloud transformation early by launching Atlas, a SaaS service. Even though Atlas accounted for only 1% of total revenue when MongoDB went public in 2017, when MongoDB had already built all of its systems based on the Open Core model, MongoDB still spent a lot of resources on building SaaS-related products and marketing systems. Since then, Atlas's revenue has increased at a compound annual growth rate of more than 40%.In contrast, its competitor, CouchBase, has relied too heavily on its traditional model and has spent a lot of effort on mobile platform support services. This slow-growing market has dragged the company into a quagmire.SaaS service-based product systems are essential for developer tools vendors today and must emphasize cooperation with cloud vendors.
+ +
+ +| ![image020](https://raw.githubusercontent.com//kaiyuanshe/2023-China-Open-Source-Report/main//public/image/commercialization/chapter_2/2-20.png) | +| --------------------------------------------------------- | + +
+ +
Figure 2.20 MongoDB Sales Revenue by Product
+
+ +**Establishing an ecology conducive to building open source industry standards** + +Developer tools, as the underlying tool layer, are decisive for the principle architecture of the upper model development. Collaboration with partners such as cloud vendors, open-source model vendors, and others helps to build consensus and establish industry standards, which is critical to ensure interoperability, compatibility, and consistency of user experience with development tools. Standardization reduces compatibility issues and enables easier integration and use of different products and services. For example, MongoDB leverages the community to form the industry standard for NoSQL RDMS. This active community not only brought high-quality, low-cost licenses to the early commercial versions of MongoDB but also served as the basis for the later Atlas (managed service). Based on the collaboration of the open-source community, Milvus launched Vector DB Bench (which can measure the performance of vector databases through the measurement of key metrics, allowing vector databases to maximize their potential), thus gradually establishing an industry standard for vector databases, and facilitating the selection of vector databases tailored to the needs of users.
+ +
+ +| ![image021](https://raw.githubusercontent.com//kaiyuanshe/2023-China-Open-Source-Report/main//public/image/commercialization/chapter_2/2-21.png) | +| --------------------------------------------------------- | + +
+ +
Figure 2.21 Vector database evaluation results
+
+ +#### 2.3.4 Exploring the Commercialization Path of Open Source Developer Tools + +The commercialization dimension of AI developer tools can draw on traditional software developer tools; the overall commercialization is still in the early stage of exploration; based on the research and analysis of open-source developer tools that have attempted commercialization, we found that there are currently following commercial paths: + +**Cloud Hosting Managed Service - Consumption-Based Pricingg** + +With the popularity of cloud computing, more and more developer tools have defaulted to serving users directly through hosted resources on the cloud. Such hosting services on the cloud can reduce the user's threshold for use but also directly provide the latest and most professional product services; in the absence of data, security, and privacy concerns, it is a good commercialization option for open-source AI developer tools projects. + +Under the business model of hosting services on the cloud, more and more projects are choosing Consumption-Based Pricing (CBP) with different product offerings; the pricing unit can be computational resources, data volume, number of requests, etc. + +AutoTrain by Hugging Face is a platform that automatically selects suitable models and fine-tunes them based on a user-supplied dataset. It has selectable model categories, including text categorization, text regression, entity recognition, summarization, question answering, translation, and tables. AutoTrain provides non-researchers with the ability to train high-performance NLP models and deploy them at scale quickly and efficiently. AutoTrain's pricing rules are not disclosed; rather, an estimated fee is charged before training based on the amount of training data and model variants. + +Scale AI focuses on data annotation products with a simple pricing model that starts at 2 cents per image and 6 cents per annotation for Scale image, 13 cents per frame and 3 cents per annotation for Scale Video, 5 cents per task and 3 cents per annotation for Scale Text, and 7 cents per annotation for Scale Document Al. Scale Text starts at 5 cents per task and 3 cents per entry; Scale Document begins at 2 cents per task and 7 cents per entry. In addition, there are enterprise-specific charging options based on the amount of data and services for specific enterprise-level projects. + +**Cloud Hosting Managed Service - tiered subscription pricing** + +Some development tool layer projects also use Cloud Hosting Managed Services but offer subscription services yearly or monthly.
+ +
+ +| ![image022](https://raw.githubusercontent.com//kaiyuanshe/2023-China-Open-Source-Report/main//public/image/commercialization/chapter_2/2-22.png) | +| --------------------------------------------------------- | + +
+ +
Figure 2.22 Dify.AI subscription pricing
+
+ +The subscription business model allows different tiers to balance cost and price according to users' needs and willingness to pay. The company Dify.ai, pictured above, for example, has tiered pricing for different volumes of users: There is a free version for individual users, but given the cost overhead, there are many limitations set; for professional individual developers and small teams, there are fewer limitations for a lower price, but there is still an upper limit on usage; and for medium-sized teams, there is a higher price for a relatively complete service. + +However, Cloud Hosting Managed Services, whether per-volume pricing or tiered subscriptions, can only offer standardized product services, and the data needs to flow to the public cloud. Some large enterprises still need to privatize and customize such a business model. + +**Private Cloud / Dedicated cloud / Customized Deployment** + +While more and more projects are utilizing services hosted directly on the cloud, hosted services on the cloud are no longer an option when larger enterprises need to have more private, customized requirements. + +Usually, with such a business model, the program also offers different options to the users. The Bring Your Own Cloud (BYOC) model is prevalent in North America, while the On-Premise scenario is better suited for more data-compliance-sensitive scenarios. + +The commercialization of open source projects at the development tool level often provides a variety of options, including the above three business models. This can be interpreted as the diversity and complexity of customer demand at this level. In exploring business models, various projects are also attempting to synchronize different paths. The future direction of development is worthy of long-term sustained attention. + +#### 2.3.5 Successful cases of open source on the developer's tool side + +Zilliz is a next-generation data processing and analytics platform for AI that provides the underlying technology for application-oriented enterprises. Zilliz developed Mega, a GPU-accelerated AI data middleware solution, which includes MegaETL for data ETL, MegaWise for database, MegaLearning for model training in the Hadoop ecosystem, and Milvus for feature vector retrieval. These systems can meet the traditional scenarios and needs of accelerated data ETL, accelerated data warehousing, and accelerated data analytics, as well as emerging AI application scenarios.
+ +
+ +| ![image023](https://raw.githubusercontent.com//kaiyuanshe/2023-China-Open-Source-Report/main//public/image/commercialization/chapter_2/2-23.png) | +| --------------------------------------------------------- | + +
+ +
Figure 2.23 Zilliz Global Users (from company website)
+
+ +Zilliz's success represents a GPU-based giant data accelerator that provides an effective solution to organizations' growing data analytics needs. Zilliz's core project, the vector similarity search engine Milvus, is the world's first GPU-accelerated massive feature vector matching and retrieval engine. Relying on GPU acceleration, Milvus provides high-speed feature vector matching and multi-dimensional data joint query (joint query of features, labels, images, videos, text, and speech) and supports automatic database sharding and multi-replicas, which can interface with AI models such as TensorFlow, PyTorch, and MxNet, enabling second-level queries for billions of feature vectors. Milvus was open-sourced on GitHub in October 2019, and the number of Stars continues to grow at a high rate, reaching 25k+ in December 2023, with a developer community of over 200 contributors and 4000 + users. In the capital market, Zilliz received $43 million in Series B, the most significant single Series B financing for open-source infrastructure software worldwide. This indicates that investment institutions are optimistic about Zilliz's potential for future development.
+ +
+ +| ![image024](https://raw.githubusercontent.com//kaiyuanshe/2023-China-Open-Source-Report/main//public/image/commercialization/chapter_2/2-24.png) | +| --------------------------------------------------------- | + +
+ +
Figure 2.24 Zilliz Github Community Operations
+
+ +Zilliz's main product is the Vector Database, a key piece of developer tools. It is a database system specialized in storing, indexing, and querying embedded vectors. This allows LLMs to store and read knowledge bases more efficiently and fine-tune models at a much lower cost. It will also play an important role in the evolution of AI-native applications. + +Zilliz is commercialized as Zilliz Cloud, with a monthly subscription business model. It is deployed in the form of SaaS, and determines the monthly subscription fee based on the number of vectors, vector dimensions, computational unit (CU) type, and average data length. Zilliz also offers a PaaS-based proprietary deployment service for scenarios with a high focus on data privacy and compliance, which is based on customized pricing.
+ +
+ +| ![image025](https://raw.githubusercontent.com//kaiyuanshe/2023-China-Open-Source-Report/main//public/image/commercialization/chapter_2/2-25.png) | +| --------------------------------------------------------- | + +
+ +
Figure 2.25 Example of Zilliz Price Calculator
+ +
+ +### 2.4 Open-source tools for the AI application layer are blooming + +#### 2.4.1 Application Layer Open Source Tools Bloom + +The development of application-layer AI is like a blossoming landscape, showing a spectacular picture of technological diversity and application breadth. Nowadays, the influence of application layer AI is expanding, some of them are oriented to consumer users, providing services covering all aspects of daily life, such as entertainment, socialization, music, personal health assistant, etc.; at the same time, they also play an important role in more specialized business fields, such as market analysis, legal processing, intelligent design, etc. These applications demonstrate the depth and breadth of AI technology, which not only improves efficiency and convenience, but also promotes innovation and technological advancement to a great extent.
+ +
+ +| ![image026](https://raw.githubusercontent.com//kaiyuanshe/2023-China-Open-Source-Report/main//public/image/commercialization/chapter_2/2-26.png) | +| --------------------------------------------------------- | + +
+ +
Figure 2.26 A wildly diverse array of AI application layer products (source:Sequoia)
+
+ +A large number of open-source application layer products have also been born, which are mostly based on LLMs and fine-tuned with industry-specific datasets. Application layer tools customized for the industry offer better performance than the generic LLMs, and the open-source nature helps bueiness and consumer users using these applications to further customize their development to better fit the needs. + +Open-source tools at the application layer facilitate integration across disciplines and industries. For example, industries such as medicine, finance, education, and retail are utilizing open-source AI tools to solve industry-specific problems, driving the adoption of the technology across all sectors. Open-source tools encourage experimentation and innovation due to low cost and low risk. Developers are free to experiment with new ideas and technologies, and this spirit of experimentation has greatly contributed to the application layer boom.
+ +
+ +| ![image027](https://raw.githubusercontent.com//kaiyuanshe/2023-China-Open-Source-Report/main//public/image/commercialization/chapter_2/2-27.png) | +| --------------------------------------------------------- | + +
+ +
Figure 2.27 Mapping of open-source tools for application testing (with examples of selected products in each domain)
+
+ +#### 2.4.2 Drivers of open source at the application layer + +**Open-source application layer products have a low threshold for use and are more easily accepted by users** + +Application layer open-source tools are less expensive and more in line with the low willingness of domestic enterprises to pay. According toiResearch, domestic enterprises are not professional enough in their internal management processes, have low recognition of the value of software, and are more willing to pay for manpower. Manufacturers need to curve to indoctrinate companies, give them a reprieve from accepting the product, and gradually unleash the demand side. Based on the above background, open-source tools meet the needs of these markets with their low-cost features, making organizations more willing to try and adopt these tools. For domestic companies with limited budgets, low cost is a significant advantage. Low- or no-cost features allow these organizations to access and use advanced technology tools without additional financial burden. + +At the same time the low-cost nature of open-source tools encourages companies to make long-term investments. Firms can build and expand their technology infrastructure over time without taking on significant financial risk. With the deepening of the enterprise's understanding of open-source products and the deepening of the degree of dependence, open-source products can gradually consider providing value-added services content, so as to achieve the purpose of long-term customer acquisition. + +At the same time open-source products are conducive to achieve seamless integration with other systems to enhance the user experience. A distinguishing feature of open-source application layer products is that they are often highly flexible and customizable. Allows users to modify and adapt to their specific needs. This means that open-source products can be customized to better fit existing systems and workflows for seamless integration with other systems. Many open-source projects follow industry standards, which helps ensure compatibility between different systems and components. Standardization promotes interoperability between different software products and simplifies the integration process, thereby improving the overall user experience. Open-source communities are typically made up of developers and users from around the world who work together to improve products and provide support. This collaborative spirit not only fosters continuous improvement of the product, but also provides a resource for solving problems that may be encountered during the integration process. + +**Open-source application layer products can receive contributions from the community to facilitate technology iteration and broaden the application scenarios** + +Application layer open-source can receive strong support from community development forces. As the application scenarios are more diverse and decentralized, the needs of different sub-scenarios are more differentiated, and the expertise of contributors to the corresponding scenarios is more demanding. Stable Diffusion (SD) is an open-source text-to-image application that, with the power of the community, has been rapidly catching up in terms of performance since its release and in some ways surpassing the closed-source text-to-image application Midjourney. While there are some inconveniences when using Stable Diffusion, users have access to hundreds of LoRAs, fine-tuning settings, and text embeds from the community. For example, users of Stable Diffusion found it to be limited in its ability to process hand images. In response, the community reacted quickly and within the next few weeks a LoRA fix was developed specifically for the hand image issue. This timely and professional feedback from the community greatly contributes to the rapid advancement and improvement of application layer open-source tools. + +Open-source products, due to a lower barrier to use, may be adopted by users from different industries and backgrounds in a variety of environments and contexts as soon as they are released. These application scenarios may go far beyond the developer's initial design and imagination. When products are used in these diverse scenarios, they may reveal new potential or needs, revealing previously unnoticed usage scenarios. This can provide product developers with valuable insights into how their products are performing in real-world use and potential room for improvement. Faced with these newly discovered usage scenarios, developers have the opportunity to innovate and improve.They can add new features, optimize existing features, or redesign products to better meet these needs based on actual user experience in different environments. The iteration based on real-world use cases, is a key driver for the continued progress of open-source products. + +**Application layer open-source products have Product-Led Growth (PLG) model features that can drive paid conversions** + +The PLG model focuses on customer acquisition through a bottom-up sales model, where the product is at the center of the entire sales process. The PLG model's growth flywheel has three main phases:Acquisition, Conversion, and Retention. In all three phases, open-source has advantages that distinguish it from traditional business models. + +In the customer acquisition phase, the open-source operating model reduces the cost of customer acquisition and makes the customer acquisition process more targeted. The interactions between developers and the community-based collaboration brought about by platforms such as GitHub accelerate the spread of customer acquisition. The initial customer orientation of open-source products is usually participants in the open-source community, often developers or IT staff in the organization. By nurturing these quality prospects, you also have a "mass base". Communities help open up the boundaries of the enterprise and make word-of-mouth spreading of good open-source projects and products possible. Users spontaneously download and use it in order to solve their own problems and pain points. At this point, open-source software products are not just used as a way to solve user problems through functionality, but can also be a vehicle to help organizations spread and grow. In the long run, it will be possible to reduce the cost of customer acquisition for your organization, allowing for more and more automated customer acquisition and lowering expenses on the sales side. + +At the conversion stage, open-source software tends to have a higher paid conversion rate compared to traditional commercial software. On the one hand, when the user has used the free version of the software, as long as the software's functions can well meet the user's needs, it can be converted into a paid conversion at the speed of a shorter cycle, and make it a long-term user. On the other hand, companies can conduct targeted conversion follow-up and up-selling by observing users' behavior with the free version of the software, for example, by providing the sales team with a list of customers who have exceeded their usage limits and are ready to pay. In addition to traditional sales conversions, conversions can also be made through self-service buying paths (Self-service selling), which largely reduces the cost of sales. + +In the retention phase, open-source software allows users to avoid the risk of vendor lock-in, making them willing to engage in long-term use. Based on the same open-source project, there may be multiple vendors downstream that offer software with similar functionality, and the choice of vendor can be changed at a relatively small cost, so users can be confident in their choice of software for the long term. On the contrary, when a customer uses a closed-source product, if he/she wants to switch to another software after a period of time, he/she needs to redeploy hardware, data, etc., resulting in a significant transfer cost. Thus when users choose to use closed-source software, they may abandon their continued use of the software because the software's later development does not meet their needs or the cost of transferring it is too high.
+ +
+ +| ![image028](https://raw.githubusercontent.com//kaiyuanshe/2023-China-Open-Source-Report/main//public/image/commercialization/chapter_2/2-28.png) | +| --------------------------------------------------------- | + +
+ +
Figure 2.28 Application Layer Open-source Growth Flywheel
+
+ +#### 2.4.3 Market Status of LLM Application Layer Open Source + +**Internet giants and startups working together** + +There are opportunities for both Internet giants and startups to participate and compete in the LLM application layer open-source market. This is due to several factors:1) The lowered technology barrier. The open-source of the modeling layer and developer tools layer lowers the threshold of technology acquisition and application. Instead of having to develop complex LLM algorithms from scratch, startups can utilize open-source models and tools to develop solutions that meet specific needs. 2) Cost-effectiveness. Open-source models often do not require costly licenses or API fees, which is especially beneficial for SMEs with relatively limited capital. 3) Innovation and flexibility. Startups are often able to adapt more quickly to market changes and innovate for specific market segments or application scenarios. + +At present, the Internet giants are mainly based on the LLM, on which they extend a series of vertical applications. For example, Ali's Tongyi Qianwen recently released Tongyi Qianwen 2.0 and derived eight applications based on it:Tongyi Tingwu (speech recognition), Tongyi Xiaomei (improving customer service efficiency), Tongyi Zhiwen (understanding text), Tongyi Stardust (personalized roles), Tongyi Lingyi (assisted programming), Tongyi Faryi (legal industry), Tongyi Renshen (pharmaceutical industry), and Tongyi Dijin (financial industry). + +Startups mainly choose a certain niche industry for deep cultivation, such as Lanboat Technology's self-developed LLM focusing on marketing, finance, cultural creativity and other scenarios; XrayGPT focusing on medical radiology image analysis; Finchat focusing on financial field models, etc. Yunqi Partners has supported two open-source application layer startups this year, TabbyML, a tool to aid programming, and Realchar, an AI personal assistant that allows for real-time customization, both of which have quickly amassed a large number of users on Github. + +**Competitive landscapes in Business-end and Consumer-end are different** + +Significant differences in the competitive landscape between the business-end and consumer-end of the open-source market for LLM application layers: + +- **To-Business Markets**:Enterprise-oriented applications are typically focused on improving efficiency, reducing costs, and enhancing decision-making capabilities. In this area, open-source LLMs can be used to automate processes, data analytics, customer service optimization, and more. The competition here focuses more on the practicality of the technology and the ability to customize it. +- **To-Consumer Markets**:Consumer-oriented applications are more focused on user experience, interactivity and ease of use. This includes personalized recommendations, virtual assistants, entertainment and social media apps, and more. Competition in the consumer market is more about innovative user interfaces and new features that appeal to users. + +**Large number of sub-scenarios still belong to the blue ocean market, no obvious lead** + +As technology evolves, market demand for AI applications becomes more segmented.For example, in industries such as healthcare, law, finance, and education, each field has its own unique needs and challenges. These market segments offer a great deal of opportunity, but also require targeted solutions.There are a number of relevant applications emerging in each of these areas, but most are at the start-up stage and have yet to produce a headline application. And because there are so many segments of the industry, there is not much competition, making it a better opportunity to get in. In these blue ocean markets, no clear market leader has yet formed due to the novelty and constant evolution of the market. This provides opportunities for new entrants and innovators to capture market share through unique solutions or innovative business models. + +**Expect innovative applications to emerge based on the new capabilities of LLMs** + +Although significant progress has been made in LLM technology, its deep integration and innovative application in specific application areas is still in its infancy. This means that there is plenty of room to explore and implement new ways of applying it in many sub-scenarios.With the rapid development of large-scale AI models, we are ushering in a new era of potential and innovation. These models will not only optimize and improve existing technology applications, but more importantly, they will be pioneers in leading completely new markets and application areas. In a future full of unknowns and surprises, we can look forward to the emergence of a huge variety of powerful new applications that will be integrated into our daily lives in unprecedented ways. These emerging markets and applications will open a window into never-before-seen possibilities for far-reaching social and cultural change. They will stimulate human creativity and imagination, pushing us to break through existing technological boundaries and explore a wider world. + +In this dynamic and innovative era, we will witness the seamless integration of technology into our daily lives and experience the convenience and efficiency that comes with intelligence. The synergy between humans and machines will open up new modes of cooperation and innovation, leading us to a smarter, more efficient, and more personalized future. It's a time of great anticipation, with every step of technological advancement building a more exciting, rich and diverse world for us. In this new era, we will witness and create unprecedented miracles together, and explore together the infinite possibilities of the common development of science and technology and mankind. + +### 2.5 The commercialization of open-source LLMs is encountering difficulties + +#### 2.5.1 Technology is evolving at a rapid pace and open-source projects need to be continuously iterated to remain competitive + +In the field of artificial intelligence and LLMs, technology is evolving at an extremely fast pace with new algorithms, data processing techniques, optimization methods, and computational architectures continue to emerge. For open-source projects, this means that constant updates and upgrades are needed to keep the technology current and effective. This need for continuous updating is a challenge in terms of resources and time.For open-source projects, especially those with relatively limited financial and human resources, it can be challenging to keep up with this rapid pace of technology iteration. This means that not only do they have to race against the clock, but they also face stiff competition from commercial companies and other open-source projects. If a project is not kept up to date to reflect the latest technological advances, it can quickly become obsolete and thus lose the interest and support of users and community members. + +In the face of well-funded companies from some tech giants such as OpenAI and Ali, some of the LLMs that small and medium-sized companies have spent a lot of money on developing could be quickly surpassed, leading to a serious funding gap. A "burn-in" strategy is possible for large vendors that small and medium-sized companies can't afford, which could potentially discourage the current 100-flower LLM market and reduce its diversity. + +#### 2.5.2 Difficulty in defining the scope of plagiarism / inspiration + +The original intention of open-source LLMs was to allow more users to access and use LLMs, but in the process of using them, disputes often arise over code attribution, licenses, and many other issues. Since LLM open-source is a relatively new concept, the relevant legal and regulatory system is not perfect, and many of them also involve cross-border issues, there is no clear definition of the boundary about whether LLM is plagiarized or borrowed. The recent Zero One Everything issue regarding LLaMA's "Shell Controversy" has generated a lot of attention. At the heart of the disagreement, but not the final judgment, is the difficulty of defining the scope of plagiarism / borrowing. + +Some argue that Zero One Everything's software uses Llama's source code without attributing it, making it appear that they developed that part of the content themselves, and is indeed suspected of violating the right of attribution, i.e., suspected of plagiarism. However, there is also the view that the structural design of the Zero One Everything LLM is based on a mature structure that draws on the publicly available results of the industry's top level. Since the development of the LLM technology is still at a very early stage, keeping the structure consistent with the industry's mainstream will be more conducive to the overall adaptation and future iteration. Meanwhile the Zero One Everything team has done a lot of work on understanding models and training, and is also continuing to explore breakthroughs in the nature of the structural level of models. + +This identification becomes even more complex in a context where LLMing technologies are still in their infancy and laws and regulations are not yet perfect. We should recognize that, with the continuous evolution of technology and the improvement of the legal system, how to balance the protection of innovation and the promotion of cooperation will be a process that needs to be continuously explored and improved.Ultimately, this is not only a legal and technical issue, but also an ethical and moral issue that concerns the healthy development of the entire industry. + +#### 2.5.3 Difficulty for community participants to provide direct contributions to model iterations + +In the process of building and iterating large-scale AI models, participants in the eco-community face a notable challenge:Due to the complexity of model training, it is often difficult for them to contribute directly to the development of the models. These LLMs, such as Llama or other advanced machine learning models, typically require highly specialized technical knowledge and resources, including large-scale data-processing capabilities, deep algorithmic understanding, and expensive hardware resources.For ordinary community members, these demands are often beyond their means. + +As a result, while community members may be enthusiastic and willing to participate, they are limited in their ability to substantially iterate on the model. This lack of expertise means that even the most active community members may only be able to play a role in relatively peripheral areas such as model application, feedback collection, or elementary debugging. This limitation not only affects the extent to which the community contributes to the development of the model, but may also lead to a weakened sense of community involvement and belonging during the model development process. Finding appropriate ways to enable a wider range of community participants to contribute their wisdom and efforts effectively is therefore an important topic in the development of the LLM. + +#### 2.5.4 Rapid development of open source technology and high cost of late updates + +One of the main advantages of open-source software is that it reduces the initial cost to the user. Enterprises can acquire and use open-source LLMs without paying expensive license fees. This is especially attractive to small businesses or startups with limited budgets, as they can utilize advanced technology without a significant financial burden. While open-source software saves money in the initial phase, they can come with higher update costs over the long run. + +Open-source projects are often known for their speed of innovation and community-driven dynamism, which drives technology to progress and evolve. However, as technology rapidly updates and iterates, the cost of maintaining and upgrading existing systems increases. Such costs include not only direct financial inputs, such as hardware upgrades or the purchase of new services, but also indirect costs, such as training staff to adapt to new technologies and the time and labor involved in migrating existing systems to newer versions. Especially for long-term projects, it becomes especially challenging to keep up with the latest open-source technologies. Every major update or technology transition can involve complex adaptation efforts and compatibility testing that require significant human and technical resources. In addition, frequent updates may lead to system stability and security issues, increasing potential operational risks. + +Therefore, while open-source technologies offer great advantages in terms of innovation and flexibility, organizations and developers must carefully consider the update costs associated with adopting and maintaining these technologies, and how to find a balance between continuous innovation and cost-effectiveness. + +Although open-source LLMs currently face numerous challenges, such as the rapid development of technology iterations, the risk of plagiarism, the limitations of community contributions, and the increased cost of maintenance, their future remains promising.Open-source LLMs have shown great potential to drive technological innovation, facilitate knowledge sharing, and accelerate R&D processes. In order to realize these potentials and overcome current challenges, a concerted effort by all parties from different fields and backgrounds is required! + +## [3. Open source security challenges](https://hackmd.io/DAH6W1DsQK2xlfd4DoFfHA#3-Open-source-security-challenges) + +## [4. Capital market situation for open source projects](https://hackmd.io/DAH6W1DsQK2xlfd4DoFfHA#4-Capital-market-situation-for-open-source-projects) + + + +## 3. Open source security challenges + +Security is an important factor in determining whether an open-source product can be successfully commercialized. Business users usually need to conduct a comprehensive security assessment of the products they use to ensure that the overall business is secure and controllable, which includes cyber-attack security, data security, and commercial license controllability. + +According to Synopsys, by the end of 2022, 84% of repository contain at least one known open-source vulnerability, 48% contain high-risk vulnerabilities, and 34% of respondents also said they had experienced "an attack launched using a known vulnerability in open-source software in the past 12 months. Open-source security is an issue that requires a great deal of attention, and it greatly affects customer trust in open-source software, as well as whether the large open-source ecosystem can be stabilized in the future. Only by ensuring security, open-source software can go farther on the road to commercialization.
+ +
+ +| ![image029](https://raw.githubusercontent.com//kaiyuanshe/2023-China-Open-Source-Report/main//public/image/commercialization/chapter_3/3-1.png) | +| -------------------------------------------------------- | + +
+ +
Figure 3.1 Open Source Codebase Vulnerabilities (Data Source:Synopsys)
+
+ +### 3.1 Open source software cybersecurity + +#### 3.1.1 Open source software security vulnerabilities can be exploited with serious consequences + +Open-source software plays a key role in driving technological innovation and facilitating knowledge sharing, but they are also inherently at risk of security vulnerabilities. The root causes of these security vulnerabilities usually lie in open-source code management and maintenance issues, such as programming errors, lack of continuous security reviews, and lagging application of updates and patches. Particularly where programs are not active enough or lack effective regulation, these vulnerabilities may go unrecognized or unfixed for long periods of time. Historically, several serious security incidents have occurred due to security vulnerabilities in open-source software, resulting in sensitive data breaches and financial losses. + +In April 2014, a major security vulnerability in the widely used open-source component OpenSSL, known as Heartbleed, emerged. This vulnerability has existed since the May 2012 release and allows an attacker to obtain data containing certificate private keys, usernames, passwords, email addresses, and other sensitive information. Because this vulnerability went undetected for nearly two years, its impact was extremely widespread and almost impossible to accurately measure. Again, in December 2021, another widely used open-source component, Apache Log4j2, was found to have a serious remote code execution vulnerability called Log4Shell. This vulnerability quickly spread globally due to the high performance and low exploitation barrier of Apache Log4j2, affecting a number of well-known companies and service platforms, including Steam, Twitter, Amazon, and others. + +#### 3.1.2 The relative prevalence of open source software cybersecurity issues + +**Open source software is inherently more vulnerable** + +According to the results of "2022 [QiAnXin QAX](https://en.qianxin.com/) Open-source Project Inspection Program", the overall defect density of open-source software is 21.06/thousand lines, and the density of high-risk defects is 1.29/thousand lines. The number of defect densities and high-risk defect densities has been increasing for three consecutive years, with an accelerating trend. The overall detection rate of the ten categories of typical defects in open-source software was 72.3%, while this figure was only 56.3% two years ago. There is a rapid increase in the detection rate of open-source software, suggesting the security issue of the software itself is quite serious. + +
+ +
+ +| ![image030](https://raw.githubusercontent.com//kaiyuanshe/2023-China-Open-Source-Report/main//public/image/commercialization/chapter_3/3-2.png) | +| -------------------------------------------------------- | + +
+ +
Figure 3.2 Three-Year Comparison of Average Defect Density of Open Source Software
+
(Source:2023 China Software Supply Chain Security Analysis Report)
+ +
+ +In terms of the absolute number of open-source software flaws and vulnerabilities, according to data from [QiAnXin (QAX)](https://en.qianxin.com/), by the end of 2022, 57,610 vulnerabilities related to open-source software will be included in the public vulnerability database, and 7,682 new vulnerabilities will be added in 2022, an incremental increase of about 15%, which is a worrisome situation. + +:::info Expert Review +**Yu Jie**:The security of open-source software urgently needs to be given sufficient attention, and it is clear that the strength of individual communities alone is not enough to deal with it. How to build an effective systems and regimes to comprehensively protect the security of open-source software has become a major issue that cannot be avoided with its rapid development. +::: + +**Open-source projects with too low or too high levels of activity are more likely to have security risks** + +Open-source software that is too inactive and updated too infrequently will result in vulnerabilities not being fixed in a timely manner, thus increasing the risk exposure of the software; if it is too active and updated too quickly, it will also result in users not being able to update accordingly in a timely manner, which puts more pressure on security operations and maintenance. + +According to the data of QAX, if the open-source projects that have not been updated for more than a year are regarded as inactive projects, the number of inactive open-source projects in the mainstream open-source software package system will be 3,967,204 in 2022, accounting for 72.1%, while this ratio was 69.9% and 61.6% in 2021 and 2020, respectively, which indicates that the overall motivation of the open-source authors to maintain the software has decreased, which is not favorable to the long-term development of the security of the open-source software ecosystem. +
+ +
+ +| ![image031](https://raw.githubusercontent.com//kaiyuanshe/2023-China-Open-Source-Report/main//public/image/commercialization/chapter_3/3-3.png) | +| -------------------------------------------------------- | + +
+ +
Figure 3.3 Statistics of Inactive Open Source Projects
+
+ +Against the backdrop of generally low activity, there are also some open-source software that are overly active, again putting a lot of security O&M pressure on users. According to QAX, there will be 22,403 open-source projects with more than 100 versions in the mainstream open-source package ecosystem in 2022, compared to 19,265 and 13,411 in 2021 and 2020, respectively. +
+ +
+ +| ![image032](https://raw.githubusercontent.com//kaiyuanshe/2023-China-Open-Source-Report/main//public/image/commercialization/chapter_3/3-4.png)| +| -------- | + +
+ +
Figure 3.4 Extremely Active Program Statistics
+
+ +Too little or too much activity poses a high security risk to users of the open-source ecosystem, and a balance is urgently needed to ensure the healthy and sustainable development of open-source software. A more scientific version management and release mechanism is needed to ensure that updates respond to security and functionality needs in a timely manner without disturbing users too frequently. For projects with insufficient activity, their activity can be enhanced by increasing community participation and providing incentives. For projects with frequent updates, more attention should be paid to communicating with users, providing clear update logs and support guidelines to help users better understand and adapt to these changes. + +At the same time, users should also be encouraged to actively participate in the feedback and contribution of the open-source project to form a positive interaction. Users' actual experience and feedback are important references for adjusting the update pace and optimizing software functions. By establishing a healthy user-developer interaction mechanism, we can effectively balance the activity and update frequency to ensure the safety and usability of the software. + +**Some users are using software that is outdated or with version usage being disorganized** + +According to QAX , many software projects use very outdated versions of open-source software, even versions released 30 years ago, with many vulnerabilities and very high risk exposure. One of the earliest software is IJG JPEG 6 released in 1995, which is still used by many projects. Older versions often come with older vulnerabilities, and there are still very old open-source vulnerabilities in some software projects. The oldest vulnerability is from 2002, 21 years ago, and is still used by 11 projects. +
+ +
+ +| ![image033](https://raw.githubusercontent.com//kaiyuanshe/2023-China-Open-Source-Report/main//public/image/commercialization/chapter_3/3-5.png) | +|-------------------------------------------------------| + +
+ +
Figure 3.5 Aged Open Source Vulnerabilities and Their Usage
+
+ +There is a lot of confusion over the use of versions of open-source software, not all of which are up-to-date. For example, there are 181 versions of Spring Framework in use. The use of earlier versions can lead to a large number of vulnerabilities that have been fixed in newer versions can still be exploited maliciously, thus posing a significant security risk. + +#### 3.1.3 Strategies for dealing with vulnerability risks in open source software + +**Regular security audits and code checks** + +A clear audit process needs to be defined that includes a comprehensive review of the overall architecture, codebase, and dependencies of the software. These audits can be performed by assembling specialized security teams or utilizing third-party security services. These teams or service providers should have an in-depth understanding of open-source software. + +Regular code review meetings are also held to encourage team members to review each other's code, which not only helps identify potential security issues, but also improves the team's programming skills and code quality. Audits and code review should be an continuous process, constantly monitoring and updating the code base in response to newly discovered vulnerabilities and security threats. + +**Using the SCA (Software Component Analysis) tool** + +Software Component Analysis (SCA) is a methodology for managing the security of open-source components, enabling development teams to quickly track and analyze the open-source components used in their projects. SCA tools identify all relevant components and supporting libraries, as well as direct and indirect dependencies between them. In addition, they can check software licenses, identify deprecated dependencies, and discover potential vulnerabilities and threats. A SCA scan produces a software bill of materials (SBOM) that contains a complete list of the project's software assets. + +With the widespread use of open-source components in software development, SCA is emerging as a key component of application security, although the concept itself is not new. The number of SCA tools has grown with its importance. In modern software development practices, including DevSecOps, SCA not only needs to provide ease of use for developers, but also needs to guide and direct developers safely throughout the software development lifecycle (SDLC). + +When using SCA for open-source security, the following points should be considered: + +- Adopt developer-friendly SCA tools: Developers are often busy writing and optimizing code, and they need tools that promote efficient thinking and rapid iteration. Unfriendly SCA tools can slow down the development process. An easy-to-use SCA tool simplifies setup and operation. Such tools should integrate easily with existing development workflows and tools, and should be implemented early in the software development life cycle (SDLC). It is important that developers understand the importance of SCA and incorporate its security checking process into their daily work to minimize code rewrites due to security issues. +- Integrate SCA into the CI/CD process: Using SCA tools does not mean that they will interfere with the development, testing, and production processes. Instead, organizations should integrate SCA scanning into Continuous Integration/Continuous Deployment (CI/CD) processes so that vulnerabilities can be identified and remediated as a functional part of the software development and build process. This approach also helps developers make code security part of their daily workflow. +- Effective Use of Reports and Software Bills of Materials: Many organizations, including the U.S. Federal Government, require a software bill of materials (SBOM) when purchasing software. Providing a detailed SBOM means that organizations recognize the importance of keeping track of every component within an application. Clear security scanning and remediation reports are also critical, as they provide detailed information about an organization's security practices and the number of vulnerabilities remediated, demonstrating a commitment to and actual action on software security. + +**Enhancing education and training** + +Conduct regular security awareness training for developers to increase their knowledge of security threats and best security practices, including educating them on identifying common security vulnerabilities and attack tactics. Use hands-on simulation exercises and workshops to allow developers to learn how to handle security incidents in a secure environment. These exercises can include vulnerability mining, code remediation, and security testing. + +Given the rapid changes in the security landscape, encourage developers to continuously learn and update their knowledge, including by participating in online courses, seminars and industry conferences. Create a platform, such as an internal forum or regular meetings, for developers to share their knowledge and experience in security to foster learning and collaboration among teams. + + +### 3.2 Controllable open source licences + +#### 3.2.1 Open source licenses are a constraint on users of open source resources, with a wide range of categories + +An open-source license is a binding for open-source resources (including, but not limited to, software, code, and web users). Based on the open-source license, the user gets the right to use, modify and share the open-source resources. If the software is not licensed, it means that the copyright is retained and the user can only view the source code and not use it. Therefore, an open-source license is essentially a legal permit that protects project contributors and users of open-source resources, ensures that contributors can open-source the resources they own in the way they want to, and also ensures that users can use the resources in a reasonable and legal way to avoid being caught in intellectual property disputes, which greatly contributes to the prosperity of the open-source community. + +Open-source licenses are divided into three overall categories based on how restrictive the license is:Permissive, Weak Copyleft, Strong Copyleft
+ +
+ +| ![image034](https://raw.githubusercontent.com//kaiyuanshe/2023-China-Open-Source-Report/main//public/image/commercialization/chapter_3/3-6.png) | +| -------------------------------------------------------- | + +
+ +
Figure 3.6 Open Source License Classification
+
+ +**The Permissive category** is the most flexible category of licenses, including BSD, MIT, Apache, ISC, etc., which provide extremely permissive licensing conditions that allow people to freely use, modify, copy, and distribute the software. They equally support the use of software for commercial or non-commercial purposes.The only requirement is that the appropriate license text and copyright information be included in each copy of the software. + +**The Weak Copyleft category** is a more restrictive license than the Permissive category, including LGPL, MPL, etc., which requires that any changes made to the code be released under the same license. Also, the modified code must contain the license and copyright information of the original code. However, they do not mandate that the entire project be released under the same license. + +**The Strong Copyleft category** is an even more restrictive type of license, including GPL, AGPL, CPL, etc. This type of license states that the entire project must be released under the same license, including those cases where only a portion of the software is used. In addition, these licenses require that all modified versions of the code be publicly released. + +Under these broad categories, specific licenses and license families will have unique restrictions, permissions, and specific differences in additional parameters, and the overall logical relationship of licenses is organized as follows:
+ +
+ +| ![image035](https://raw.githubusercontent.com//kaiyuanshe/2023-China-Open-Source-Report/main//public/image/commercialization/chapter_3/3-7.png) | +| -------------------------------------------------------- | + +
+ +
Figure 3.7 License Logic Relationships
+
+ +Kaiyuanshe provides an open-source license filter, which provides good help to understand the best license options faster and better, and is highly recommended for readers who need it:https://kaiyuanshe.cn/tool/license-filter + +#### 3.2.2 Risk of infringement by using open source resources without complying with the license + +**Open source license infringement** + +"Open-source license infringement" is the use of open-source software without complying with the terms and conditions of the open-source license associated with the software, thereby violating the legal constraints imposed by the license. Such behavior can lead to a host of legal and ethical problems. While open-source software is freely available to the public for use and modification, such use and modification is still subject to certain limitations, which are specified by the corresponding open-source license. + +Specific instances include, but are not limited to, the following: + +Ignoring Copyright Notices and Attribution:Many open-source licenses require that original copyright notices and author attributions be retained when copying, distributing, or modifying software. Ignoring this requirement, such as removing the original author's copyright information or failing to properly attribute the work, is considered an infringement. + +Non-availability of Source Code:Some licenses, such as the GPL (General Public License), require that the source code be made available along with the distribution of the software. If a piece of software based on such a license is distributed without the source code being made available at the same time, this also constitutes infringement. + +Restrictive Use:Some licenses have restrictions on the scenarios in which the software can be used. For example, certain licenses may prohibit the use of the software in certain types of business activities. Violation of these restrictive covenants is also a tort. + +Violating Conditions for Distribution and Re-licensing:Copyleft open-source licenses such as the GPL requires that any modifications and derivative works based on GPL-licensed software must also be released under the GPL license. Violations of this rule, such as privatizing GPL code or distributing derivative works under non-GPL licenses, constitute copyright infringement. + +Violation of Specific Terms:In addition to the common scenarios described above, there are specific license terms that may be violated under certain circumstances. This depends on the specific requirements of the particular license. + +**License Reciprocity Requirement Leads to Expanded Scope of Open Source Copyright Problems** + +The so-called "reciprocity requirement" of an open-source license, i.e., whether a derivative work follows the license of the original work, refers to the fact that the terms and conditions of an open-source license tend to continue to apply during the process of open sourcing the software, which includes copying, modifying, manipulating, redistributing, and displaying. The permissions and limitations of such licenses can extend vertically to derivative works and modified versions based on the original software development, and even horizontally affect other parts of the software developed based on such open-source software. + +Of the many open-source licenses, the GPL has the strongest reciprocity requirements and the most lawsuits associated with it. The main reason for this is:Any derivative software based on GPL code modifications needs to be open source. If a piece of software contains GPL code, even if it is only a portion, the software as a whole is usually required to be open-source (unless it meets the terms of a specific exception). Failure to open-source portions of proprietary software affected by the GPL may result in infringement by the user in violation of the obligations of the GPL license. Moreover, the GPL is extremely complex, containing 17 terms. It has more stringent requirements for users, and once these requirements are violated, the user's license agreement is terminated and continued use of GPL-licensed open-source software may constitute copyright infringement.
+ +
+ +| ![image036](https://raw.githubusercontent.com//kaiyuanshe/2023-China-Open-Source-Report/main//public/image/commercialization/chapter_3/3-8.png) | +| -------------------------------------------------------- | + +
+ +
Figure 3.8 GPL License Related Litigation
+
+ +**Infringement of open source licenses may lead to serious consequences** + +Once an open-source license is characterized as an infringement, the loss to the defendant company or individual is far more than just compensation payment, but also includes a series of issues such as reputation and partnership: + +Lawsuits and Fines:In 2017, Versata Software sued Ameriprise Financial for violating Versata's patents. While this is not a pure case of open-source license infringement, it involves software licensing and copyright issues. The case eventually ended in a settlement, but the legal fees and time costs involved were prohibitive. + +Enforcing Compliance with License Requirements:A famous case is the 2015 VMware vs. Hellwig case. Hellwig, a Linux kernel developer, accused VMware of using GPL-based Linux code in its ESXi products without following the open-source requirements of the GPL license. Although the court did not ultimately rule in Hellwig's favor, the case sparked a broader discussion about GPL license obligations and derivative works. + +Reputational Damage:Red Hat filed a lawsuit against Speakeasy, Inc. in 2004 for allegedly failing to comply with the requirements of the GPL license. Despite the settlement of the case, Speakeasy's reputation has suffered, especially in the open-source community. + +Business Impact:Cisco was sued by the Free Software Foundation (FSF) in 2008 for violating the GPL license for its Linksys products. Cisco ultimately agreed to comply with the GPL license and pay an undisclosed amount as a donation. The lawsuit led Cisco to reconsider its open-source strategy for its products. + +Partnership Damage:a company is found to be in violation of an open-source license, its business partners may reevaluate their relationship with the company, especially if the collaborative project involves open-source software. + +#### 3.2.3 Open source large model licenses are largely distinct from traditional licenses + +As open-source LLMs are still evolving and iterating, two highly influential open-source LLMs of the year:Llama2 and Falcon, have both been questioned as to whether or not they are truly "open source" due to tweaks to the terms of their open-source licenses. Both do not use commercially available licenses, but rather their own "LLAMA 2 COMMUNITY LICENSE AGREEMENT" and "TII Falcon LLM License", respectively; and both impose additional restrictions on their commercial use. Both have additional restrictions on their commercial use. + +**Difference in open source licenses for LLaMA2** + +Much of the discussion of Llama2's violation of open-source guidelines comes from its more unique terms: + +- The Llama2 open-source model may not be used in products or service platforms with monthly active MAUs greater than 700 million, unless approved and licensed by Meta; +- The Llama2 open-source model may not be used in any manner that violates applicable laws or regulations, including trade compliance laws. Also not applicable to use in languages other than English; +- Other LLMs (not including Llama2 or its derivatives) + +The Open Source Initiative (OSI) has published ten definitions of open source, which are currently recognized internationally, and the Llama2 protocol conflicts with two of them + +- Non-Discrimination Against Individuals or Groups:The Llama License prevents enterprise users with more than 700 million monthly users from obtaining licenses directly through this License. +- Non-Discrimination Against Fields:The license shall not restrict anyone from using the program in a particular field. The Llama License prohibits the use of Llama2 outputs to improve other AI LLMs, which would be a restriction on the domain of use. Llama2's language restrictions also lead to limitations in the use of Chinese language domains. + +**Difference in open source licenses for Falcon** + +The TII Falcon LLM License makes some key changes from the Apache License. The Apache License is a popular open-source license that is friendly to commercial use and allows users to distribute or sell their modified code as an open-source or commercial product after meeting certain conditions. + +Falcon's license is similar to the Apache License in that it also provides broad permissions to use, modify, and distribute the licensed work, and requires that the license text be included in the distribution and properly attributed, in addition to a disclaimer of limitations of liability and warranties. + +However, the TII Falcon LLM License introduces additional commercial use terms that require commercial applications to pay a 10% license fee on annual revenues in excess of $1 million. It also places additional restrictions on the manner in which the work may be published or distributed, such as emphasizing the need for attribution to "Falcon LLM technology from the Technology Innovation Institute." + +**The purpose of open-source for LLMs of open-source is different from that of traditional open-source software** + +In the case of Llama2, for example, the license is essentially a guiding framework for organizations that intend to develop and deploy AI systems while adhering to Meta's established specifications and standards. The purpose of this framework is to ensure that these organizations meet specific rules and standards set by Meta when developing and deploying AI technologies. Such an approach helps Meta manage the scope and manner in which its AI technology is applied, thereby safeguarding its business interests and brand image. + +The Llama2 license may constitute a compliance requirement that must be adhered to for those who plan to conduct AI development on the Meta platform. This means that these organizations must follow Meta's specific specifications and requirements when using Meta-provided tools and resources to develop and deploy AI models. In doing so, these companies may need to apply to Meta for the appropriate licenses, of which the Llama2 license is a part. + +#### 3.2.4 Means of securing controllable licenses + +**Document the use of open source components** + +When the enterprise or individual user's software reaches a certain size, the burden of managing the included open-source components becomes heavier, which leads to infringement problems due to the inability to manage them in a timely manner. According to Synopsys, 89% of the codebase contains open-source code that has been out of date for at least four years, and 88% of the codebase contains components that have been inactive for the past 2 years and contain components that are not the latest version. In many cases, developers may have completely forgotten which open-source components have been used and are unable to react in a timely manner when licenses for those open-source components are updated, leading to infringement issues. Therefore, it becomes very necessary to manage open-source components in a reasonable way. + +Developers can manually or automatically maintain a detailed dependency list of all used open-source components and their version information in the project's documentation. For example, in many programming languages, dependencies can be tracked using files such as requirements.txt (Python), package.json (Node.js), and so on. + +Create an internal document or knowledge base that records all relevant information about the open-source components used, including their origin, license information, and how they are used, and regularly check their licenses for updates. Track in detail in the documentation which open-source components are used, and add comments in the corresponding places in the code to indicate this. Add the corresponding license website to the document to check it regularly and find out the changes of the license terms in time. Also document in your programming how you have complied with valid license conditions. + +For larger volumes of development work, manually recorded text may not be able to meet the project requirements, at this time you can use related tools, such as code component analysis (SCA) software. These tools automatically identify and document the open-source components used in a project. They are usually able to provide detailed reports that include component license information, versions, and possible security vulnerabilities. + +**Cautious use of supplementary coding tools** + +Intelligent programming assistants such as ChatGPT and GitHub Copilot provide programming advice and code snippets by analyzing a large number of codebases and documentation. While these tools are extremely valuable in improving programming efficiency, there are several key points to consider when using the code they generate to avoid potential open-source license infringement issues: + +- License Issues with Source Code:Assistive programming software may generate suggestions based on code in its training datasets. These training datasets may contain code from different open-source projects that may have various license requirements. Usually supplementary programming results do not index the corresponding licenses, and copyright issues may be involved if the generated code snippets are too close to the original code and are copied directly by the user. + +- Attribution of Responsibility:When using code generated by an intelligent programming assistant, it needs to be clear that the ultimate responsibility lies with the user. This means that the developer is responsible for the legality and suitability of the generated code. As a result, developers conduct regular code reviews, especially for sections generated using assisted programming, to ensure that they do not violate the terms of any open-source license. + +**Adequate code audits during mergers and acquisitions** + +An adequate code audit during the M&A process is essential, especially to avoid infringement issues involving open-source licenses. M&A activities usually involve a thorough evaluation of the target company's assets, of which technology assets, especially software assets, occupy an important place. The following issues need to be highlighted in M&A audits: + +- Identifying Open-source Components:An important task of a code audit is to identify all open-source components used in the target company's products. This includes open-source libraries and frameworks that are used directly, as well as open-source software that is indirectly relied upon. Understanding these components and their versions is critical to assessing the associated license requirements. +- Reviewing License Compliance:After confirming an open-source component, its corresponding license needs to be reviewed. This includes determining the types, limitations and obligations of these licenses. In particular, note that some licenses may have specific restrictions on commercial use or require disclosure of modified source code. +- Assessing Risks and Responsibilities:During the audit, the legal and financial risks that may arise from non-compliance with open-source licenses should be assessed. This includes potential infringement lawsuits, fines, or the need to refactor parts of the product that rely on specific open-source components. +- Post-Integration Compliance Strategies:After an M&A is completed, there needs to be a clear plan for integrating the target company's codebase and ensuring continued compliance with all relevant open-source license requirements. This may involve implementing new code management and compliance monitoring processes throughout the organization. +- Professional Legal Advice:Because open-source licenses can be very complex, obtaining professional legal advice is critical. A professional attorney can help correctly interpret the terms of the license and provide advice on how to handle potential license conflicts. + +### 3.3 Open Source AI Security + +With the popularity of LLMs, in addition to the LLM license issues mentioned above, more AI safety and control issues have gradually entered people's view. Since the technology is relatively new and there is no clear definition and specification, this paragraph lists the topics of greater concern to the relevant practitioners at the moment based on desk research, in the hope of triggering readers' thinking, and welcomes discussion and feedback. + +#### 3.3.1 Open Source AI Poses New Requirements for Data Security + +Unlike traditional data security, since a large part of the output results of AI LLMs depends on the training dataset, issues such as the quality of the dataset and whether the dataset contains malicious data are particularly important for AI LLMs, especially open-source LLMs, because many of the datasets of the open-source LLMs provide data internally by the enterprise, and the cleansing, monitoring, and compliance can't be done as professionally as those of the professional closed-source LLM vendors. + +**Improper handling of the training dataset triggers a range of biases** + +Data bias occurs when certain elements in a data set are overemphasized or underrepresented. When training AI or machine learning models based on such biased data, it can lead to biased, unfair and inaccurate results. + +- **Selective Bias**:Some facial recognition systems, trained primarily on white images, have relatively low accuracy in recognizing faces of different races; +- **Exclusionary Bias**:This bias usually occurs at the data preprocessing stage, and if the data is based on stereotypes or false assumptions, then the results will be biased regardless of which algorithm is used; +- **Observer Bias**:Researchers may consciously or unconsciously bring their personal views into a research project, which can influence the results; +- **Racial Bias**:Racial bias occurs when a dataset is biased toward a particular group; +- **Measurement Bias**:This bias occurs when the data used for training does not match the data in the real world, or when incorrect measurements distort the data. + +These biases, when used maliciously, can lead to outputs that are significantly politically or racially biased, or data errors that can significantly affect the performance and credibility of the larger model. + +**Training data sources should be taken into account when choosing a LLM of an open-source base** + +Many of the LLM training data sources are obtained directly from the Internet via crawler tools, where discriminatory, hateful and offensive speech and information is prevalent. In practice, people read, comment, like and spread negative messages far more than positive ones. As a result, human-generated information sources have long been in a more chaotic and unhealthy state. LLMs in this environment may contribute to the spread of racial discrimination and disinformation by being influenced by such data. + +Once the data source at the base of the LLM is contaminated, even if the enterprise itself is fine-tuned to use a perfect data source, it can lead to significant bias in the final output. Therefore, when choosing a LLM for the base, users should not only consider the performance of the LLM, but should also take the source of the training data into consideration. The focus should be on LLMs that select annotated datasets from multiple sources in a responsible manner, while considering bias minimization as a factor to focus on throughout the model building process and even after deployment. + +#### 3.3.2 The extensive use of open-source AI LLMs raises ethical considerations for society + +**The problem of LLM hallucinations can lead to serious consequences** + +There is an unresolved problem with current LLMs - hallucinations. According to the Sail Lab at HIT (Harbin Institute of Technology), hallucination refers to "text generation tasks in which unfaithful or meaningless text is sometimes produced. "While hallucinatory texts are unfaithful and meaningless, they are often so readable due to the powerful context generation capabilities of the LLM that the reader is led to believe that they are based on the provided context, even though it is actually very difficult to find or verify that such a context actually exists. This phenomenon is similar to mental hallucinations that are difficult to distinguish from other "real" perceptions, and it is also difficult to capture hallucinatory texts at a glance. + +There are many types of illusions and they are still emerging as the use of LLMs expands. The main types of common hallucinations are the following: + +- **Logic Errors**:The LLM makes logical errors in its reasoning, which results in outputs that seem reasonable but don't stand up to scrutiny; +- **Fabricated Facts**:The database of the LLM itself does not support its answer to this question, but since the LLM cannot define its own boundaries, it will confidently assert facts that simply do not exist; +- **Data-Driven Bias**:As mentioned in the previous section, due to the prevalence of certain data, the output of the model may be biased in certain directions, leading to erroneous results. + +False outputs due to LLM hallucinations may cause harm to some users who are convinced by them. On May 16, 2023, the World Health Organization issued a statement of caution on the use of AI LLM tools. They noted that while these tools facilitate access to health information and may enhance the efficiency of diagnosis, particularly in resource-poor areas, their use requires a rigorous assessment of potential risks. The World Health Organization further emphasized that rushing into the use of inadequately tested systems could lead to mistakes by healthcare professionals, harm to patients and reduced trust in AI technologies, which could undermine or delay the potential long-term benefits and applications of such technologies globally. +
+ +
+ +| ![image037](https://raw.githubusercontent.com//kaiyuanshe/2023-China-Open-Source-Report/main//public/image/commercialization/chapter_3/3-9.png) | +| -------------------------------------------------------- | + +
+ +
Figure 3.9 Classification of hallucinations by Harbin Institute of Technology
+
+ +Since there is not yet a clear accountability entity for LLMs, and even more so for open-source LLMs, in the event of serious consequences, it will be very difficult for users who have suffered losses to defend their rights and their losses to be mitigated. Currently there are 2 pressing issues to be addressed in this regard: + +- How LLMs hallucinations can be better addressed - technical aspects +- How to define more clearly who is responsible for LLMs - legal aspects + +**Outputs from LLMs may output content that violates ethical laws** + +At present, some LLMs lack content filtering mechanisms, resulting in output content that violates domestic laws and regulations, public order and morals, mainly containing the following situations: + +- Copyright Issues:LLMs may generate content that contains or resembles copyrighted material. For example, the model may create text that is similar to pre-existing literary works, song lyrics, movie scripts, and so on.Such a generation may violate the rights of the original author or copyright holder, leading to legal disputes; + +- Territorial legislation:Different countries and regions have their own unique legal systems. For example, certain countries have stricter censorship of Internet content, such as explicit bans on politically sensitive content, religious messages or specific expressions on gender issues. When the LLM runs in these regions, the generated content must comply with local laws. For example, when someone asked an LLM "how to cook wild giant salamander", the model answered "braise it" and even provided detailed steps. Such answers may mislead the questioner. As a matter of fact, wild giant salamander are Class II protected animals and should not be captured, killed or eaten. + +- Defamation and Misinformation:If model-generated content contains false accusations or defamatory statements about individuals or organizations, legal action may result. This places high demands on ensuring the accuracy and legitimacy of the content. + +In order to ensure compliance with various legal requirements, organizations using LLMs may need to put in place regulatory mechanisms, such as auditing generated content to ensure that it does not violate any legal requirements. Especially for open-source models used by enterprises, they are relatively more leniently scrutinized for content output, and enterprises need to pay extra attention to related issues to prevent getting into legal disputes and incurring losses. Here again, it can be summarized in 2 questions: + +- How to Enhance Information Filtering Mechanisms for LLMs - Technical Aspects +- How to define whether LLM output content is infringing and illegal - legal aspects + +**LLMs may exacerbate social divide** + +The Secretary General of the Digital Economy Committee of the Beijing Computer Society has said:The potential security issues of LLMs are of particular concern for those who lack critical thinking and analytical skills, and who are not well-informed about paid knowledge and healthcare services. With the dramatic increase in the number of Internet users and the widespread use of mobile devices, such as cell phones, low-education and low-income populations are increasingly relying on these avenues for medical, educational, and daily life advice. However, large-scale generative language models may exacerbate discriminatory portrayals and social biases against these marginalized groups, deepen social divisions, increase the harm of misleading, malicious information, and raise the risk of disclosure and misuse of individuals' real information. + +The use of LLMs is like a double-edged sword; on the one hand, it can reintegrate network resources and improve the efficiency of information collection; on the other hand, it may exacerbate information barriers due to problems such as hallucinations and lead to the misinformation of many populations with scarce information sources. There are 2 issues that need to be addressed at this point: + +- Enhancing public education that LLMs are not a panacea and need to be viewed with caution - Social communication aspect +- How to ensure the quality of LLM training datasets and reduce their bias - technical aspect + +## 4 Capital market situation for open source projects + +### 4.1 The status of global markets + +#### 4.1.1 Global VC Investment Declines in 2023, but AIGC is in the Spotlight + +Since 2023, volatility in global financial markets has increased due to growing interest rates, challenging economic conditions, geopolitical conflicts, and concerns about the stability of the international financial system, which has led to a bleak picture for the global VC capital markets. According to KPMG, global venture capital activity has declined for seven consecutive quarters through Q3 2023 (see Figure 4.1).
+ +
+ +| ![image038](https://raw.githubusercontent.com//kaiyuanshe/2023-China-Open-Source-Report/main//public/image/commercialization/chapter_4/4-1.png) | +| -------------------------------------------------------- | + +
+ +
Figure 4.1 Global Venture Capital Activity (Source:KPMG)
+
+ +Against the backdrop of a declining equity market, fund managers have generally reduced their allocations to private equity assets to maintain portfolio proportions; at the same time, due to the high volatility of venture capital and the uncertainty of the future global economic situation, the scale of venture capital fundraising in 2023 will drop significantly compared with that of previous years. Compared to an average of more than $250 billion annually over the past five years (2018-2022), venture capital commitments as of 2023Q3 amounted to just $116 billion (according to KPMG). Overlaying the trend of seven consecutive quarters of declining venture capital activity, fundraising will shrink significantly in 2023Q4 and for the full year.
+ +
+ +| ![image039](https://raw.githubusercontent.com//kaiyuanshe/2023-China-Open-Source-Report/main//public/image/commercialization/chapter_4/4-2.png) | +| -------------------------------------------------------- | + +
+ +
Figure 4.2 Global Venture Capital Fundraising Scale (Source:KPMG)
+
+ +At the valuation level, investor caution is also growing. Compared to 2021 and 2022, the proportion of premium financing has decreased by about 10%, and the proportion of par and discount financing has risen by about 5%, which creates an obstacle to the exit of early-stage capital.
+ +
+ +| ![image040](https://raw.githubusercontent.com//kaiyuanshe/2023-China-Open-Source-Report/main//public/image/commercialization/chapter_4/4-3.png) | +| -------------------------------------------------------- | + +
+ +
Figure 4.3 Global VC Premium, Parity, and Decline Investment Ratios (Source:KPMG)
+
+ +However, against the backdrop of an overall bleak environment, AIGC-related financings have been in the global spotlight, with a significant increase in the size of related financings. In North America, the largest number of AI-related companies will be unicorns in 2023, including AI agent startup Imbue, AI + biotech company TrueBinding, generative AI company Runway, and natural language processing company Cohere; in Europe, despite the overall slowdown in funding, AI companies have been particularly strong, with a large number of startups receiving funding, such as French AI platform company Poolside; in Asia, investor interest in AI is also rising, but national regulators are also increasing the regulation of generative AI. In Europe, despite the overall funding slowdown, AI companies are doing particularly well, with a large number of startups receiving funding, such as French AI platform company Poolside; and while investor interest in AI in Asia continues to grow, so too does regulatory oversight of generative AI by national regulators. + +It is expected that along with the rapid iteration of AI technology, the concepts of LLM and AI Agent continue to be hot, the investment and financing related to the AI field will be less affected by the contraction of the scale of global venture capital investment. + +#### 4.1.2 Global Open Source Financing + +The growth of commercial open-source companies has been remarkable in recent years, with the combined market capitalization of these companies growing rapidly from $10 billion to surpass the $500 billion mark. This significant growth not only demonstrates the huge potential of open-source technology in the commercial sector, but also reflects the high level of investor recognition and trust in the open-source model. According to OSS Capital, the market capitalization of commercial open-source companies is expected to reach a staggering $3 trillion in the future. + +The open-source business sector has shown solid growth over the past four years. Over 400 startups raised approximately 700 rounds totaling $29 billion during this period.Specifically, annual financing increases from $270 million in 2020 to $12.5 billion in 2023, a compound annual growth rate of 255%. + +Although the size of the financing showed a downward trend in 2022, this trend was mitigated in 2023. Beginning in February 2023, financing begins to pick up gradually. In the first 11 months of 2023, total funding has already surpassed the amount raised in all of 2022. However, volatility in the scale of financing increased throughout the year, influenced by geopolitical conflicts and the post-epidemic economic recovery. Financing peaked at around $2 billion or so in March, May and September, and was below average in June and August. + +Even in the lowest funding month of 2023, $386 million in monthly funding exceeded the highest monthly funding in 2021 and even surpassed the total funding for all of 2020 ($272 million). This trend reflects the capital market's continued interest in and recognition of open-source business. This apparent trend of growth in funding shows the growing interest and confidence of the capital markets in open-source business. Investors value not only the innovative potential and technological advantages of open-source models, but also their sustainability in the marketplace and long-term growth potential. +
+ +
+ +| ![image041](https://raw.githubusercontent.com//kaiyuanshe/2023-China-Open-Source-Report/main//public/image/commercialization/chapter_4/4-4.png) | +| -------------------------------------------------------- | + +
+ +
Figure 4.4 Amount of Global VC Funds Invested in Commercialized Open-source Software Companies (Source:OSS Capital)
+
+ +Analyzing from the perspective of financing scale of each round, the capital prefers medium-term financing such as B, C, D, and so on. This reflects the characteristics of commercial open-source companies:In the early stage, the technical details are still unclear, and the business model is not clear; however, when they gradually cross the start-up stage, commercial open-source companies will explode with stronger growth momentum, attracting more capital; in the later stage when the business model is gradually matured and the open-source product becomes well-known and generates stable cash flow, the need for financing will be reduced.
+ +
+ +| ![image042](https://raw.githubusercontent.com//kaiyuanshe/2023-China-Open-Source-Report/main//public/image/commercialization/chapter_4/4-5.png) | +| -------------------------------------------------------- | + +
+ +
Figure 4.5 Distribution of Financing Rounds for Commercialized Open Source Software Companies ($M) (Source:OSS Capital)
+
+ +A total of 328 commercial open-source companies have received more than $10 million in funding over the past four years. Of these, the main concentration was in the US$10-50 million range, with a total of 210 rounds, or 64% of all rounds, in the US$10-20 million and US$20-50 million ranges. There were 49 rounds of $50-100 million and 46 rounds of $100-200 million, accounting for 29% of all rounds. A total of 23 companies received more than $200 million in funding, with two of them even receiving more than $500 million in a single round.
+ +
+ +| ![image043](https://raw.githubusercontent.com//kaiyuanshe/2023-China-Open-Source-Report/main//public/image/commercialization/chapter_4/4-6.png) | +| -------------------------------------------------------- | + +
+ +
Figure 4.6 Distribution of Financing Rounds for Commercialized Open-source Software Companies ($M) (Source:OSS Capital)
+
+ +### 4.2 The status of China market + +#### 4.2.1 Overview of the development of China's equity capital market + +**The number and size of newly established funds declined, but the overall trend is gradually improving** + +In the first half of 2023, 3,930 new funds were launched in the (PE/VC) market, down 12% from 4,456 new funds launched in the same period last year. During this period, new fund launches totaled $364.2 billion, a decrease of 3% year-over-year. Despite the decline in size and volume compared to last year, the second quarter performed better than the first quarter, with an overall improving trend:Specifically, new fund launches in the first quarter amounted to $161.4 billion, a decline of nearly 20% year-on-year, while the second quarter recorded $202.8 billion, an increase of 16% year-on-year.
+ +
+ +| ![image044](https://raw.githubusercontent.com//kaiyuanshe/2023-China-Open-Source-Report/main//public/image/commercialization/chapter_4/4-7.png) | +| -------------------------------------------------------- | + +
+ +
Figure 4.7 Domestic Private Equity Fund Contributions and Volume (Source:investment.com, KPMG)
+
+ +**Increase in the size of RMB funds and a significant decrease in the size of foreign currency funds** + +In the first half of 2023, the number of new RMB funds launched was 3,840, a decrease of 13% compared to the same period last year. The total size of RMB funds reached US$339.5 billion, a 13% increase compared to the same period last year. The size of foreign currency funds was $24.7 billion, a significant decline of 67% from the previous year. Despite the increase in the number of foreign currency funds in 2023, their impact on the total size is small as most are small funds. + +This trend indicates that the domestic equity investment market prefers the more conservative investment style of RMB funds:and requires a higher degree of stability in the portfolio companies. For open-source business startups in China, simply following the market buzz is no longer enough to attract investment. Technological strength and long-term growth potential become key factors in assessing whether to make further investments.
+ +
+ +| ![image045](https://raw.githubusercontent.com//kaiyuanshe/2023-China-Open-Source-Report/main//public/image/commercialization/chapter_4/4-8.png) | +| -------------------------------------------------------- | + +
+ +
Figure 4.8 Size and number of domestic private equity RMB funds (Source:KPMG)
+
+ +
+ +| ![image046](https://raw.githubusercontent.com//kaiyuanshe/2023-China-Open-Source-Report/main//public/image/commercialization/chapter_4/4-9.png) | +| -------------------------------------------------------- | + +
+ +
Figure 4.9 Domestic Private Equity Foreign Currency Fund Size and Volume (Source:KPMG)
+
+ +**Economic recovery falls short of expectations and decline in overall investment volume and size** + +Against the macro backdrop of unstable roots of economic recovery, slowdown in overall demand, and instability in external markets, the total number of investments in the H1 equity market in 2023 will be 3,750, a year-on-year decline of 31%; the total amount of investment supplied will be USD56.9 billion, a decline of 6% compared to the same period last year. Compared to the financing side where the size of newly established funds declined by 3%, a stronger contraction has been shown on the investment side, which further illustrates the cautious sentiment of investors, which is consistent with the trend shown by international markets.
+ +
+ +| ![image047](https://raw.githubusercontent.com//kaiyuanshe/2023-China-Open-Source-Report/main//public/image/commercialization/chapter_4/4-10.png) | +| --------------------------------------------------------- | + +
+ +
Figure 4.10 Amount and number of investments in the domestic equity market (Source:KPMG)
+
+ +#### 4.2.2 Steady development of domestic open source ecology + +**Open-source industry is gradually improving in all aspects of the ecosystem and is steadily growing** + +At present, the domestic open-source industry is experiencing the development pattern of both top-level design and industrial progress, talent reserve and technological innovation, making progress together in all aspects from laws and regulations, policy support, competition selection, and all links of the industry chain. + +In terms of laws and regulations, Zhang Guofeng, deputy director of the Institute of Artificial Intelligence and Change Management at the University of International Business and Economics in Shanghai and secretary general of the Shanghai Open-source Information Technology Association on November 2, 2023, said at the media communication meeting of the 2023 Open-source Industry Ecological Conference that Shanghai's open-source industry planning and policies are in the process of being drafted and pushed forward, and that Shanghai must seize the historic opportunity to actively participate in digital governance and digital public goods international cooperation (news from The Paper); in terms of policy support, at the 2023 Global Open-source Technology Summit (GOTC), the Shanghai open-source industry service platform was officially announced to start:Shanghai Pudong Software Park signed a contract with the Linux Foundation Asia-Pacific to officially land the Linux Foundation Asia-Pacific Open-source Community Service Center, and signed a strategic cooperation agreement with OSChina to build the Shanghai open-source ecological (News from Wen Hui Bao). In terms of competition selection, China has already had a series of open-source competitions such as "China Software Open-source Innovation Competition" and "OpenHarmony Competition Training Camp", which have attracted students from Shanghai Jiaotong University, Fudan University and other domestic universities to participate in the competitions, and a large number of innovative highlights have emerged from the competitions, fully reflecting the momentum and great potential of the flourishing co-construction of the open-source ecosystem. A large number of innovative highlights emerged in the competition, fully reflecting the good momentum and great potential of the open-source ecological construction. + +All segments of the open-source chain are thriving. In the field of artificial intelligence, numerous companies have open-sourced base LLMs, including Alibaba open-sourcing Tongyi Qianwen, High-Flyer Quant open-sourcing DeepSeek, and more. Startups in Baichuan Intelligence, Zhipu AI, Zero One Everything and so on have respectively released a variety of LLMs of their own training base, it is worth mentioning that these companies are favored by the capital market, respectively, in this year, one or more high-value financing. In the developer tools layer, a number of startups that are already deep in the game are joined by new players and there are already products that are trying to go global. In the foreseeable future, there are also opportunities for open-source AI applications to usher in more opportunities at the application layer. + +In the area of underlying operating systems, large companies are promoting the localization of operating systems, including the Anolis OS open-source community developed by Alibaba and the openEuler community supported by the OpenAtom Open-Source Foundation. These large enterprises also have notable open-source project layouts in a number of key areas, including cloud native, big data, artificial intelligence, and front-end technologies. For example, ant-design, Ant Group's enterprise UI design tool, PaddlePaddle, Baidu's deep learning platform, and Apache Echarts, a data visualization charting library, all have a wide reach and large user base in the GitHub community. + +In the big data and database industry, a number of startups are actively strategizing in response to the large and diverse data generated by domestic and international markets, as well as the growing demand for data processing. For example, PingCAP launched TiDB, a distributed relational database, and TiKV, a distributed key-value database; TDengine, a time-series database; and ShardingSphere, a distributed database middleware from SphereEx. With the development of AI technology, innovative products have emerged in the AI field, such as Zilliz's vector database developed for AI applications and Jina.ai's neural search engine, which enables searches across all types of content.
+ +
+ +| ![image048](https://raw.githubusercontent.com//kaiyuanshe/2023-China-Open-Source-Report/main//public/image/commercialization/chapter_4/4-11.png) | +| --------------------------------------------------------- | + +
+ +
Figure 4.11 Map of domestic AI-related tech companies' open source projects and open source companies (partial)
+
+ +**ModelScope has become the first portal for domestic open source LLMs, marking the gradual growth of China's open-source AI community construction** + +ModelScope Community is an AI modeling community launched by Ali Dharma Institute in collaboration with the Open-source Development Committee of China Computer Federation (CCF), aiming to build a next-generation open-source model-as-a-service sharing platform, and strive to lower the threshold of AI applications. Since its launch, it has expanded rapidly:The community now has over 2,300 models, over 2.8 million developers, and over 100 million model downloads. Baichuan Intelligence, Wisdom Spectrum AI, Shanghai Artificial Intelligence Laboratory, IDEA Research Institute and other leading LLMing organizations use ModelScope as their open-source model debut platform. + +The ModelScope community upholds the concept of "Model as a Service" and treats AI models as an important element of production, providing services around the model lifecycle, from model pre-training to secondary tuning and finally to model deployment. Compared to the foreign community Hugging Face, ModelScope pays more attention to domestic needs, provides a large number of Chinese models, and promotes the application of relevant AI scenes in China.
+ +
+ +| ![image049](https://raw.githubusercontent.com//kaiyuanshe/2023-China-Open-Source-Report/main//public/image/commercialization/chapter_4/4-12.png) | +| --------------------------------------------------------- | + +
+ +
Figure 4.12 So far, ModelScope community has 11 model classes including LLM, zero-sample learning, etc.
+
+ +The establishment and rapid development of the ModelScope community has set a benchmark for China's open-source community culture, which is conducive to further promoting the spread of open-source culture in China, attracting more creative, open-source spirit of technology creators, technology users to join, and promoting the further prosperity of China's open-source cause. + +#### 4.2.3 Domestic Open Source Company Financing Remains Hot + +The market heat maintained in 2023, with several large investments taking place and some startups raising multiple rounds of funding in a year, reflecting the high level of investor interest. Open Source China is an open-source community platform company, including nearly 100,000 world-renowned open-source projects, under the banner of open-source community Landscape and Japan's old open-source community OSDN, and also owns the code hosting platform Gitee, which is the leading code hosting service platform in China, and has obtained a 775 million yuan of strategic financing in the B+ round; SelectDB develops and promotes open-source real-time data warehouse Apache Doris, and provides technical support and commercial services for Apache Doris users, and has obtained a new round of several hundred million yuan of financing so far. Flywheel Technology, which develops and promotes the open-source real-time data warehouse Apache Doris and provides technical support and commercial services for Apache Doris users, has obtained a new round of financing of hundreds of millions of yuan, and the total financing scale has reached nearly 1 billion yuan up to now; Lanboat Technology, which provides a new generation of cognitive intelligence platform based on NLP technology, has completed the investment of the Pre-A+ round, and the total financing scale has reached hundreds of millions of yuan in less than a year. + +At present, the development of China's open-source ecosystem is still at an early stage, and the financing events in 2023 will mainly focus on round B and before, involving artificial intelligence, open-source communities, data warehouses and LLMing platforms, and other fields, with vast market opportunities. + +
+Table 4.1 Investment and Financing of Domestic Open Source Software Startups (slide to right to view full content) +
+
+(Github statistics as of December 7, 2023) +
+ +| **Company** | **Open source project** | **Corporate operations** | **Latest round of financing round** | \*\* Amount of latest round of financing\*\* | **Time of latest round of financing** | **GitHub Star** | **GitHub Fork** | +| --------------------------------------------- | ------------------------------------ | -------------------------------------------------- | ---------------------------------------- | ----------------------------------------------------- | ------------------------------------- | --------------- | --------------- | +| **Tributary Technologies** | Apache APISIX | Microservices API Gateway | A + round | Millions of dollars. | June 2021 | 10.8k | 2k | +| **Moby Dick Open Source** | Apache DolphinScheduler | Cloud-Native DataOps Platform | Pre-A round | tens of millions of dollars | July 2022 | 9.4k | 3.5k | +| **Flywheel Technologies** | Apache Doris | Cloud Native Real-Time Warehouse | Pre-A round | several hundred million dollars | June 2023 | 6.5k | 1.9k | +| **Even Tech** | Apache HAWQ | Hadoop SQL Analysis Engine | B + round | Nearly $200 million | August 2021 | 672 | 324 | +| **Tianmou Technology** | Apache IoTDB | Time Series Database System | angel round (finance) | nearly a billion dollars | June 2022 | 2.8k | 750 | +| **Short step information technology** | Apache Kylin | Big Data online analytical processing engine | D round | $70 million. | April 2021 | 3.4k | 1.5k | +| **StreamNative** | Apache Pulsar | distributed message queue | A + round | - | 2023 | 12k | 3.2k | +| **SphereEx** | Apache ShardingSphere | Distributed Database Pluggable Ecology | Pre-A round | Nearly $10 million | January 2022 | 17.7k | 6.1k | +| **Antoine Mound (AutoMQ)** | automq-for-rocketmq automq-for-kafka | Streaming storage software and message queues | Angel rounds + | Tens of millions of RMB | November 2023 | 195 | 34 | +| **Smart Spectrum AI** | ChatGLM | Large Prophecy Model | B++++ | RMB 1.2 billion | September 2023 | 36.3k | 4.9k | +| **Luchen Technology** | Colossal-AI | High-Performance Enterprise AI Solutions | angel round (finance) | $6 million | September 2022 | 6.8k | 637 | +| **Chatopera** | cskefu | Multi-Channel Intelligent Customer Service System | angel round (finance) | millions of dollars | August 2018 | 2.2k | 742 | +| **Digital Change Technology** | Databend | cloud warehouse (computing) | angel round (finance) | Millions of dollars. | August 2021 | 4.8k | 500 | +| **Dify.AI** | Dify | LLMOps platform | fund | undisclosed | 44986 | 11.8k | 1596 | +| **Image Cloud Technology** | EMQX | MQTT Message Middleware | B round | 150 million | December 2020 | 10.8k | 1.9k | +| **TensorChord** | Envd | MLOps | seed round | Millions of dollars. | November 2022 | 1.3k | 102 | +| **Stoneware Technology** | FydeOS | Chromium-based operating systems | Pre-A round | tens of millions of dollars | February 2022 | 1.5k | 192 | +| **Generalized intelligence** | GAAS | Autonomous UAV flight program | * | undisclosed | October 2018 | 1.7k | 411 | +| **GeekCode** | Geekcode.cloud | cloud development environment | seed round | Millions of RMB | April 2022 | 42 | 2 | +| **Gitee** | git | Git Code Hosting | B + round | 775 million | July 2023 | - | * | +| **Polar Fox** | GitLab | DevOps Tooling Platform | A++ round | tens of millions of dollars | September 2022 | - | * | +| **White Sea Technology** | IDP | AI Data Development Platform | seed round | tens of millions of dollars | December 2021 | 17 | 3 | +| **Ella Yunko** | illa-builder | Low-code development platform | angel round (finance) | Millions of dollars. | September 2022 | 2.3k | 126 | +| **Gina Technology** | Jina | A multimodal neural network search framework | Series A | $30 million | November 2021 | 16.8k | 2k | +| **Juicedata** | JuiceFS | distributed file system (DFS) | angel round (finance) | millions of dollars | October 2018 | 7.1k | 605 | +| **Harmonic Cloud Technology** | Kingdling | Container Cloud Products and Solutions | B + round | over one hundred million dollars | January 2022 | 270 | 56 | +| **Fly to Cloud** | JumpServer | Cloud & DevOps | D + Wheel | 100 million | April 2022 | 19.5k | 4.8k | +| **Talent Cloud Technology** | Kubernetes | Container Cloud Platform | Mergers and Acquisitions - Bytes | undisclosed | July 2020 | 94.1k | 34.5k | +| **Zeto Technology** | Kunlun | distributed database | angel round (finance) | tens of millions of dollars | August 2021 | 112 | 15 | +| **Deepness Technology** | LinuxDeepin | Linux operating system | B round | tens of millions of dollars | April 2015 | 413 | 70 | +| **Matrix origin** | Matrixone | data intelligence | angel + round | Tens of millions of dollars | October 2021 | 1.3k | 212 | +| **Mission Technologies** | Mengzi | macrolanguage model | Pre-A+ round | several hundred million yuan (RMB) | March 2023 | 530 | 61 | +| **Zilliz** | milvus | vector search engine | B + round | $60 million. | August 2022 | 14.4k | 1.9k | +| **Euronet** | Nebula | distributed graph database | Pre-A + round | Nearly $10 million | November 2020 | 8.3k | 926 | +| **PLEASURE NUMBER TECHNOLOGY** | NebulaGraph | distributed graph database | Series A | Tens of millions of dollars | September 2022 | 9.7k | 1.1k | +| **First class technology** | oneflow | Deep Learning Framework | Mergers and Acquisitions - Meituan | - | 2023 | 4.1k | 478 | +| **Facial Intelligence** | OpenBMB | Large model applications | seed round | undisclosed | August 2021 | 359 | 49 | +| **EasyJet Travel Cloud** | OpenStack | IaaS | Round E | undisclosed | July 2021 | 4.6k | 1.6k | +| **Original Language Technology** | PrimiHub | privacy calculations | Angel rounds + | multi-million dollar | October 2022 | 263 | 60 | +| **Good Rain Technology** | Rainbond | Cloud Operating System for Enterprise Applications | Pre-A round | millions of dollars | August 2016 | 3.6k | 664 | +| **Quick use of cloud computing** | QuickTable | Code-free data modeling tools | * | undisclosed | August 2021 | 7 | 3 | +| **Rayside Technology** | RT-Thread | Internet of Things Operating System | - | undisclosed | January 2020 | 7.6k | 4.2k | +| **Giant Sequoia Database** | SequoiaDB | Distributed relational database | D round | several hundred million dollars | October 2020 | 305 | 115 | +| **Borderless Technology** | Shifu | IoT Software Development Framework | Series A | undisclosed | June 2022 | 205 | 21 | +| **Dingshi Vertical** | StarRocks | MPP Analytical Database | B round | undisclosed | January 2022 | 3.6k | 793 | +| **Stone Atomic Technology** | StoneDB | Real-time HTAP database | angel round (finance) | tens of millions of dollars | February 2022 | 639 | 100 | +| **TabbyML** | TabbyML | Open Source AI Programming Assistant | seed round | undisclosed | 45108 | 13.9k | 515 | +| **Taiji graphic** | Taichi | Digital content creation infrastructure | Series A | $50 million | February 2022 | 21.7k | 2.1k | +| **Titanium-platinum data** | Tapdata | Real-time data service platform | Pre-A + round | Tens of millions of dollars | July 2021 | 223 | 52 | +| **Throughout data** | TDengine | Time-Series Spatial Big Data Engine | B round | $47 million | May 2021 | 20.1k | 4.6k | +| **PingCAP** | TiDB | distributed database | Round E | undisclosed | July 2021 | 32.9k | 5.3k | +| **Digital Paradise** | uni-app | A Unified Front-End Framework with Vue Syntax | B + round | undisclosed | September 2018 | 37.4k | 3.4k | +| **LINGO TECHNOLOGY** | Vanus | Large Model Middleware | seed round | Millions of dollars. | 45108 | 2.2k | 110 | +| **Future speed** | Xorbits | Distributed Data Science Computing Framework | angel round (finance) | Millions of dollars. | 44958 | 933 | 58 | +| **Levi Software** | Zabbix | IT operations management | Series A | undisclosed | November 2022 | 2.6k | 766 | +| **KodeRover** | Zadig | Cloud Native Software Delivery Cloud | Pre-A round | tens of millions of dollars | August 2021 | 1.8k | 636 | +| **EasySoft Tianchuang** | zentaopms | Agile Project Management | Series A | tens of millions of dollars | October 2021 | 946 | 275 | +| **Cloud Axis Information** | ZStack | IaaS | * | undisclosed | March 2021 | 1.2k | 380 | + +
+ +
+Table 4.2 Investment and Financing of Domestic Open-source LLMing Startups (slide to right to view full content) +
+
+(Hugging Face statistics as of December 7, 2023) +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Company Latest financing round Date of last financing Recent financing volume Model Introduction model name likes download
百川智能 A 轮 2023-10-17 00:00:003 亿美元 在知识问答、文本创作领域表现突出 Baichuan-7B795102k
Baichuan-13B-Chat6128.29k
Baichuan2-13B-Chat321133k
智谱 AIB+++++ 轮 2023-09-19 00:00:0012 亿人民币 多模态理解、工具调用、代码解释、逻辑推理 ChatGLM-6B2.67k56.8k
ChatGLM2-6B1.91k97.7k
ChatGLM3-6B501104k
元语智能 出资设立 2022-11-24 00:00:00 功能型对话大模型 ChatYuan-large-v2171669
ChatYuan-large-v1108120
ChatYuan-7B93
面壁智能 天使轮 2023-04-14 00:00:00 数千万人民币 大语言模型,包括包括文字填空、文本生成、问答 cpm-bee-10b15819
cpm-ant-10b2212.6k
cpm-bee-1b127
澜舟科技 Pre-A + 轮 2023-03-14 00:00:00 数亿人民币 处理多语言、多模态数据,文本理解、文本生成 mengzi-t5-base411.42k
mengzi-bert-base321.46k
mengzi-t5-base-mt1744
虎博科技 A 轮 2019-03-01 00:00:003300 万美元 多语言任务大模型,覆盖生成、开放问答、编程、画图、翻译、头脑风暴等 15 大类能力 tigerbot-70b-chat-v2401.68k
tigerbot-180b-research3312
tigerbot-70b-base-v1153.25k
深势科技 C 轮 2023-08-18 00:00:00 超 7 亿人民币 高精度蛋白质结构预测模型 Uni-Fold-Data6
三维分子预训练模型 Uni-Mol-Data3
元象 XVERSEA + 轮 2022-03-11 00:00:001.2 亿美元 大语言模型,具备认知、规划、推理和记忆能力 XVERSE-13B11742
XVERSE-13B-Chat42412
XVERSE-65B356.18k
零一万物 天使轮 2023-11-06 00:00:00 通用型 LLM,其次是图像、语音、视频等多模态能力。Yi-34B1.07k109k
Yi-6B30326.7k
Yi-34B-200K1074.55k
+ diff --git a/en/data.md b/en/data.md new file mode 100644 index 0000000..53f5ac0 --- /dev/null +++ b/en/data.md @@ -0,0 +1,1494 @@ +--- +outline: deep +--- +# OSS Data Analytics + +## Overview + +The China Open Source Annual Report is based on in-depth and comprehensive data insights and is divided into eight major parts. The 1st part, **General Overall Macro Insights**, provides an overview of China's global open-source ecology through an in-depth analysis of essential events, active repositories, active users, open-source licensing, and programming languages. The 2nd part, **OpenRank Rank List**, is the list of open source projects, enterprises, foundations, developers, and collaborative robots in all areas of the world and China, and provides a comprehensive and systematic OpenRank indicator information service for industry. Part 3 and Part 4 contain **Enterprise Insights** and **Foundation Insights**, which illustrate the evolution of global and Chinese enterprises and foundations in the open source area through evolution maps and trend analyses. Part 5 **Technology Sector Insights** provides an in-depth study on the evolution of the Top 10 lists and projects in each area, showing the direction and trends in forward technology. Part 6 **Open Source Project Insights** provides insights into the diversity and innovative directions of different project types, areas, and topics. Part 7 **Open Source Developer Insights** An analysis of developer types, hours of work, geographical distribution, and robotic use shows the diversity and characteristics of the developer community. Part 8, **Case Studies**, provides a series of interesting case analyses that allow readers to glimpse China's exponential ecological boom. Overall, the data page offers a panorama of China's open-source ecology in 2023 through rich data insights and analyses. + +### Introduction to indicators + +**OpenRank** + +The OpenRank indicator is a collaborative network indicator developed by the X-lab Open Laboratory and based on an open source developer-project collaborative relationships network, which not only characterizes the overall development of projects community participation but also introduces elements of open source ecology, which can be well identified and displayed by such entities as projects, people, organizations, etc. in open source ecology. OpenRank is now widely accepted by industry and academia, including the China Institute for Standardization (ISI) series of Open Source Governance Standards, the ICT White Paper on Open Source Governance, the Open Atomic Open Source Foundation Global Open Source Screen, and the Business Open Source Office Governance Toolkit. + +For a definition of this indicator, refer to: + +[1] [Shengyu Zhao et al.: OpenRank Leaderboard: Motivating Open Source Collections Through Social Network Evaluation in Alibaba. ICSE, 2024] (https\://www\.researchgate.net/publication/3766686121_OpenRank_Leaderboard_Motivating_Open_Source_Collections_Through_Social_Network_Evaluation_in_Alibaba) + +[2] [Zhao Honghou: How to evaluate an open source project (iii) value stream, 2021] (https\://blog.frankzhao.cn/how_to_measure_open_source_3) + +[3] Institute for Standardization of the Ministry of Industry and Information: Information Technology Open Source Governance Part 3:Community Governance and Operationalisation [T/CESA 1270.3-2023]; Information Technology Open Source Governance Part 5:Evaluation Model for Open Source Contributors" [T/CESA 1270.5-2023], 2023 + +**Activity** + +Activity is a statistical indicator of the level of activity of the X-lab researcher or developer. Developer activity is weighted by the behavior of developers, such as Issue, PR, and Code Review. The project's activity is processed by the sum of the total activity of all developers in the project. + +For a definition of this indicator, refer to: + +[1] [Xiaoya Xia et al: Exploring activity and contributors on GitHub: Who, what, when, and when. APSEC, 2023](https://ieeexplore.iee.org/abstract/document/10043221) + +[2] [Frank Zhao:How to evaluate an open source project (i) - activity,2021](https://blog.frankzhao.cn/how_to_measure_open_source_1) + + +## 1. Overall Macro Insight + +### 1.1 Basic Events + +**Basic events** are the database for this data page analysis and refer to a series of event log data generated by developer activity on GitHub, Gitee, and others on the global open-source collaborative platform. A statistical analysis of underlying events provides a macro insight into the dynamics of global ecological development. This annual open-source report covers the collaborative platforms GitHub, Gitee, and GitLink. + +#### 1.1.1 Trends in events across GitHub + +First, the total number of events logs for statistical analysis across GitHub is shown in the graph below. + +![1-1](/image/data/chapter_1/1-1.png) + +
Figure 1.1 Trends in GitHub annual events
+
+ +The overall activity of global open sources and the number of active warehouses have increased significantly in recent years, reflecting the growth rate in global open-source development.2023 GitHub log data reached 1.4 billion compared to 2022 when it increased by about 10.32 percent. After high growth in 2018-2020, the GitHub platform's annual event growth gradually declined, with a growth rate of about 10% in 2023. However, the 10 percent growth rate, because of its overall volume, continues to highlight open-source technology's dynamic and critical role in the global digital transition. + +#### 1.1.2 Comparison of overall events trends in GitHub and Gitee + +Because of the size of the events active on the GitHub platform, the subsequent analysis was built on the benchmark of the top 30,000 active warehouses per platform. For ease of comparison, we have selected GitHub for statistical analysis of 8 categories of events of greater relevance to open source participation in Gitee, including CommunityCommentEvent, ForkEvent, IssueCommentEvent, IssuesEvent, FullRequestEvent, FullRequestReviewCommentEvent, PushEvent, and WatchEvent. + +![1-2](/image/data/chapter_1/1-2.png) + +
Figure 1.2 GitHub and Gitee Active Repository Events
+
+ +The Gitee platform showed a more pronounced growth trend. Even since 2021, the number of incidents in the top 30,000 active warehouses has surpassed GitHub, highlighting the outbreak of active open-source projects in the country. Domestic developers' active participation and contribution to open-source communities have injected new dynamism into technological innovation and knowledge sharing. + +However, it must be emphasized that data on the first 30,000 active projects alone does not fully reveal the reality of the global GitHub platform, as the long-end effects are still evident globally. Subsequent analyses will reflect this more clearly, especially in the broad and diverse nature of the GitHub platform as the world's leading open-source community. In the future, with the evolution of technology and the promotion of an open-source culture, the Chinese open-source community can be expected to continue to flourish globally. + +Further to the analysis of disaggregated data on underlying events, the results are shown in the figure below. + +![1-3](/image/data/chapter_1/1-3.png) + +
Figure 1.3 GitHub vs. Gitee Active Repository Event Types
+
+ +Can be seen from the analytics results: + +The most frequent event type on the GitHub platform is the Push event, while Pull Request events and Issue Comment events rank 2nd and 3rd, respectively. The occurrence rates of each event type have remained relatively stable, reflecting a trend towards a stable ecosystem in GitHub's open-source community. +On the Gitee platform, event data grew significantly in 2020, initially focusing on Watch events. But after 2020, Pull Request and Review Events grew rapidly, becoming the largest event type in 2022 and growing steadily in 2023. The structural changes in Gitee event data reflect a significant shift in the role of domestic developers from a watchdog to a contributor, which is consistent with observations worldwide. + +#### 1.1.3 GitLink Events Analysis + +For the GitLink platform, we have also selected the top 30,000 active repositories as benchmarks. Given the limitations of the data, only data covering the six types of events—CommunityCommentEvent, ForkEvent, IssueCommentEvent, IssuesEvent, FullRequestEvent, and WatchEvent—were selected for analysis. + +![1-17](/image/data/chapter_1/1-17.png) + +
Data analysis of events on the GitLink platform
+
+ +While the number of active repository events on GitLink still lags behind platforms like GitHub and Gitee, it exhibits a notable upward trend. On the GitLink platform, Issues events and CommitComment events constitute the vast majority of active repository events. + +### 1.2 Active Repository + +#### 1.2.1 Trends in GitHub total number of active warehouses + +The following figure shows the statistical analysis of the overall activity trends of GitHub and Gitee active repositories. + +![1-4](/image/data/chapter_1/1-4.png) + +
Figure 1.5 Trends in the number of GitHub annual active repositories
+
+ + +According to overall data for 2023, the total number of active repositories worldwide reached 87.92 million, marking a 4.06% increase from the previous year; this aligns with the overall trend in events, which has been declining annually since experiencing high growth from 2018 to 2020. This decline could stem from the COVID-19 pandemic and global economic developments. + +Because of the gap in the number of GitHub and Gitee warehouses, the following analytical work is also based on 30,000 active repositories in front of each platform. + +#### 1.2.2 Comparison of the overall activity of GitHub and Gitee + +The graph below shows the statistical analysis of GitHub and Gitee's overall activity in the repositories. + +![1-5](/image/data/chapter_1/1-5.png) + +
Figure 1.6 GitHub vs. Gitee active repository activity
+
+ +Looking at the activity data of the top 30,000 active repositories from each platform, the overall activity on the Gitee platform grew rapidly from 2019 onwards. By 2022, it surpassed GitHub and maintained this high-growth trend, revealing the enormous vitality of open-source development in China during this period. + +![1-6](/image/data/chapter_1/1-6.png) + +
Figure 1.7 GitHub compared to Gitee active repository activity
+
+ +Furthermore, the detailed analysis of the composition of the activity reveals the following: + + +On the GitHub platform, the activity stemming from "Create PR" events comprises nearly half of the total activity, while "Merge PR" events contribute to approximately one-fourth. Reviewing PRs contributes around 10% of the activity, while the combined activity from issue creation and comments nearly matches, accounting for 7%. + +On the Gitee platform, the highest activity contribution comes from reviewing PRs, constituting two-thirds of the total activity. Similarly to GitHub, "Merge PR" events follow closely behind in activity contribution, with a proportion comparable to that on the GitHub platform. A surprising finding is that while "Create PR" events contribute the highest proportion of activity on GitHub, they contribute the least on the Gitee platform, accounting for only 2% of the total activity events. + +#### 1.2.3 GitHub and Gitee overall active repository OpenRank trends vs. + +The graph below shows the statistical analysis of GitHub and Gitee's active repository, OpenRank trends. + +![1-7](/image/data/chapter_1/1-7.png) + +
Figure 1.8 GitHub vs. Gitee Active Repository OpenRank
+
+ +Although the activity of the top 30,000 repositories on Gitee briefly surpassed that of GitHub in 2022, the influence gap measured by OpenRank remains significant (approximately 5:2). Not only is the gap considerable but there also seems to be no indication of it narrowing in terms of trends. This is particularly noteworthy and underscores a key area of focus for future open-source development in China. + + +### 1.3 Active users + +#### 1.3.1 Trends in the total number of active users on GitHub + +The following figure presents a statistical analysis of the overall active user count on GitHub. + +![1-8](/image/data/chapter_1/1-8.png) + +
Figure 1.9 Trends in GitHub annual active users
+
+ +In 2023, the total number of active developers in the field reached 21.93 million, an increase of 8.88 percent over the previous year. Like the GitHub active warehouse, after nearly five years of high growth, the growth rate began to decline in 2020. The growth of active users on the GitHub platform began to slow (although the GitHub official announced at the beginning of 2023 that the overall number of users of its platform surpassed 100 million), there was also some correlation with changes in the global situation and the rise of a platform like Gitee. + +#### 1.3.2 Active user geographical distribution and ranking + +The annual report can include detailed geo-location data analysis for GitHub developers as a contribution to the award-winning game of the OpenDigger Open Source Software Ecological Data Analysis Dredging Platform ([OpenSODA](https://github.com/ECNU/OpenSODA)). + +The following analysis is based on approximately 2 million developers who have correctly filled in their geographical location information out of the 10 million active developers on GitHub in 2023. Considering the total registered users on GitHub to be 100 million, the sampling ratio is approximately 2%. + +**1. Geographical distribution of global developers** + +First, analyze developers' geographical distribution worldwide, as shown in the following chart. + +![1-9](/image/data/chapter_1/1-9.png) + +
Figure 1.10 Global geographical distribution of developers
+
+ +
Table 1.1 Global Developer Distribution by Country/Region (Top 15)
+
+ +| Ranking | States | Total Number | Percentage | Annual Activity | Active rate | +| :-----: | :------------: | :----------: | :--------: | :-------------: | :---------: | +| 1 | United States | 408983 | 21.09% | 236899 | 57.92% | +| 2 | India | 177669 | 9.16% | 107066 | 60.26% | +| 3 | China | 171039 | 8.82% | 126238 | 73.81% | +| 4 | Brazil | 114855 | 5.92% | 83932 | 73.08% | +| 5 | Germany | 88767 | 4.58% | 64836 | 73.04% | +| 6 | United Kingdom | 83245 | 4.29% | 55175 | 66.28% | +| 7 | Canada | 65241 | 3.36% | 42238 | 64.74% | +| 8 | France | 57480 | 2.96% | 40341 | 70.18% | +| 9 | Russia | 47213 | 2.43% | 31534 | 66.79% | +| 10 | Australia | 31638 | 1.63% | 20512 | 64.83% | +| 11 | Poland | 31469 | 1.62% | 21792 | 69.25% | +| 12 | Japan | 30873 | 1.59% | 21942 | 71.07% | +| 13 | Netherlands | 30617 | 1.58% | 21685 | 70.83% | +| 14 | Spain | 28928 | 1.49% | 19509 | 67.44% | +| 15 | South Korea | 28325 | 1.46% | 21811 | 77.00% | + +Overall, developers from various countries are continuously increasing: + +- The United States ranks first due to its early involvement in the open-source domain and its advantage in technology talent. +- Based on the calculated total number of developers from the United States in the table (409,000), the actual number of developers from the United States on GitHub is estimated to be around 21.01 million, with a deviation of approximately 4% from the official data released by GitHub (22 million). +- India, China, and Brazil, with their large population bases, rank second, third, and fourth in terms of the number of developers. However, based on the activity rate (annual active users/total users), China has the highest rate among the top four. +- Developers from European countries also constitute a significant force in the open-source community, collectively ranking second in volume. +- According to the official data released by GitHub and Gitee (both around 12 million), the total number of global open-source developers from China is likely to exceed 20 million, roughly equivalent to the number from the United States in quantity alone. + +**2. Geographical distribution of Chinese developers** + +Further analysis shows the geographical distribution of Chinese developers, as shown in the graph below\.Of these, the data sources are almost 150,000 developers of “China” users who correctly fill out provincial information. + +![1-10](/image/data/chapter_1/1-10.png) + +
Figure 1.11 Geographical distribution of Chinese developers
+
+ +According to data from GitHub 2023 Q3 quarter, the total number of Chinese developers is approximately 18.8 million, which can be estimated on the basis of proportion to the total actual developers in each province. + +
Table 1.2 Distribution of Chinese Developers (Top 15)
+ +
+ +| Ranking | Provinces | Total Number | National percentage | Actual Total | +| :-----: | :-------: | :----------: | :-----------------: | :-------------: | +| 1 | Beijing | 32982 | 22.04% | 262.25 million | +| 2 | Sengah | 24581 | 16.43% | 1955.45 million | +| 3 | Guangdong | 21684 | 14.49% | 172.41 000 | +| 4 | Zhejiang | 14256 | 9.53% | 113.35 million | +| 5 | Taiwan | 12173 | 8.13% | 96.79 million | +| 6 | Jiangsu | 7335 | 4.90% | 58.32 million | +| 7 | Chechen | 7012 | 4.69% | 55.75 million | +| 8 | Hong Kong | 4678 | 3.13% | 37.19 million | +| 9 | Hubei | 4415 | 2.95% | 35.1 million | +| 10 | Shaanxi | 2815 | 1.88% | 22.38 000 | +| 11 | Fujian | 2405 | 1.61% | 19.12 million | +| 12 | Shandong | 2035 | 1.36% | 16.18 million | +| 13 | Hunan | 1858 | 1.24% | 14.77 000 | +| 14 | Chongqing | 1833 | 1.22% | 1457 000 | +| 15 | Annah | 1487 | 0.99% | 11.82 million | + +Ranking and data in the above table reveal the relevance of Chinese open-source developers and regional economic development levels: + +- The number of open source developers in the North, Upper and Zhej's four major cities has surpassed one million classes, particularly in Beijing; +- The fifth and eighth places respectively of Taiwan and Hong Kong, highlighting the importance of Hong Kong and the Taiwan Strait; +- The open source developer in the Long Triangle (Jijjiang Zhejushu) region has reached almost 38.8 million; +- The central western regions, such as Sichuan, Hubei and Shaanxi, have also shown good performance, particularly in Sichuan, which has attracted a large number of developers through their suitable, fast-growing software industries. + +### 1.4 Open source licenses + +#### 1.4.1 Number of warehouses using open-source licenses + +The graph below shows the number of open-source licenses that GitHub's active repository uses. + +
+1-11 +
+ +
Figure 1.12 Number of warehouses using open source licenses
+
+ +The analysis revealed that the most used open-source licenses are currently available, including MIT licenses, Apache licenses v2.0, GNU General Public Licence v3.0, and BSD 3-Clause licenses. Of these, MIT licenses rank first to reach 60%. The MIT license is named after the Massachusetts Institute of Technology. The simplicity and flexibility of MIT licenses have made it one of the licenses chosen by many developers and have provided the least legal restrictions to encourage developers to use and disseminate software freely. + +#### 1.4.2 Trends in Open-Source Licensing Types + +Statistical analysis has been conducted on the trends of open-source license types, as shown in the following figures. + +![1-12](/image/data/chapter_1/1-12.png) + +
Figure 1.13 Trends in the Number of Open Source License Types
+
+ +Overall, the number of open-source license types has continuously increased since 2017. Introducing licenses such as the Eclipse Public License 2.0, the European Union Public License 1.2, and others contributed to the growth observed between 2017 and 2018. Subsequently, the growth rate of open-source license types slowed down. Between 2021 and 2022, a new batch of open-source licenses, such as the Mulan Series Licenses and the CERN License v2, began to emerge. Following this, the development trend stabilized, and currently, the mainstream license types on GitHub have remained steady at 46 types for two years. + +### 1.4.3 Trends in the Number of Repositories Using Open Source Licenses + +According to Github's log data, in 2023, nearly 7.7 million active repositories used various open-source licenses, accounting for 8.76% of all active repositories. We present the MIT License's data separately due to its significant influence. + +**1. Trends in the Number of Repositories Using the MIT License** + +Statistical analysis of the trends in the number of repositories using the MIT License is shown in the following figure. + +![1-13](/image/data/chapter_1/1-13.png) + +
Figure 1.14: Trends in the Number of Repositories Using the MIT License
+
+ +Observations: + +- The MIT License is currently the most popular open-source license, with 1.58 million active repositories in 2023. +- The trends in the number of repositories using the MIT License are similar to those of the total repository count, with significant growth observed. However, the growth rate slowed down in 2022 and 2023, which correlates with the overall slowdown in project growth. + +**2. Trends in the Number of Repositories Using Other Top Five Open Source Licenses** + +The following figure shows a statistical analysis of the trends in the number of repositories using other top-five open-source licenses. + +![1-14](/image/data/chapter_1/1-14.png) + +
Figure 1.15: Trends in the Number of Repositories Using Other Licenses
+
+ +Observations: + +- The number of open-source licenses is growing, with MIT, Apache, and GNU licenses remaining the top choices. +- Differences between niche and popular open-source licenses still exist. +- Since 2022, the usage of GNU General Public License (GPL) versions 2 and 3 has been declining overall, while GNU Affero General Public License version 3 has been increasing yearly. + +#### 1.4.3 Trends in the Number of Repositories Using the Mulan Series Licenses + +The following figure shows a statistical analysis of the trends in the number of repositories using the Mulan Series Licenses. + +![1-15](/image/data/chapter_1/1-15.png) + +
Figure 1.16 Accumulative Trends in the Number of Repositories Using the Mulan Series Licenses
+
+ +The Mulan Series Licenses (including the Mulan Permissive Software License and the Mulan Public License, among others) are drafted, revised, and released by Peking University, with the support of the National Standardization Technical Committee on Cloud Computing and the China Open Source Cloud Alliance. As the first open-source software agreement recognized by the Open Source Initiative (OSI) in China, the Mulan Permissive Software License (Mulan PSL) holds significant influence. + +Observations indicate a growth in repositories utilizing the Mulan licenses starting September 2022. By December 2023, there were 220 such active repositories, showcasing the increasing influence of Mulan open-source licenses. + +### 1.5 Programming Languages + +#### 1.5.1 Top Programming Languages Used by Developers in 2023 + +The popularity of programming languages is of great interest to developers. The analysis below presents the most popular programming languages among developers in 2023, as shown in the following table. + +
Table 1.3: Top 15 Programming Languages Used by Developers
+
+ +| Rank | Programming Language | Number of Developers Using | Number of Repositories Using | +|:-------:|:-----------------------:|:-------------------------------:|:--------------------------------:| +| 1 | JavaScript | 765,589 | 1,806,477 | +| 2 | Python | 629,423 | 653,025 | +| 3 | HTML | 564,121 | 676,364 | +| 4 | TypeScript | 462,729 | 886,453 | +| 5 | Java | 368,795 | 463,660 | +| 6 | CSS | 190,480 | 239,187 | +| 7 | C++ | 177,905 | 135,330 | +| 8 | C# | 158,159 | 180,537 | +| 9 | Go | 143,433 | 165,367 | +| 10 | PHP | 128,186 | 272,980 | +| 11 | Jupyter Notebook | 122,475 | 102,708 | +| 12 | Shell | 122,456 | 108,209 | +| 13 | C | 107,918 | 80,159 | +| 14 | Rust | 69,370 | 72,778 | +| 15 | Ruby | 66,857 | 374,835 | +| 16 | Kotlin | 64,307 | 62,709 | +| 17 | Vue | 56,099 | 170,639 | +| 18 | SCSS | 50,526 | 44,672 | +| 19 | Dart | 46,143 | 43,006 | +| 20 | Swift | 33,839 | 35,978 | + +From the table above: + +- The top five programming languages most used by developers are JavaScript, Python, HTML, TypeScript, and Java, which represent the leading programming languages developers use. Starting from the sixth-ranked CSS, the number of users decreased by nearly half compared to Java, the fifth-ranked language. + +#### 1.5.2 Trends in Programming Language Usage from 2019 to 2023 + +Statistical analysis of developers' programming language usage trends from 2019 to 2023 is depicted in the following figure. + +![1-16](/image/data/chapter_1/1-16.png) + +
Figure 1.17: Trends in Programming Language Usage from 2019 to 2023
+
+ +Observations from the figure: + +- JavaScript, Python, HTML, TypeScript, and Java are the leading programming languages developers use. +- Python and TypeScript have shown rapid growth compared to the other three primary languages and have maintained a consistently rapid growth trend over the past five years. +- TypeScript, in particular, has experienced rapid growth in the number of users over the past five years. In 2021, it significantly surpassed other programming languages, becoming one of the main programming languages developers use. Perhaps by 2024, the number of developers using it will be comparable to the number of developers using HTML, which is ranked third. + + + +## 2. OpenRank Rankings + +**Rankings** are a popular form of presenting analysis results. + +The 2023 China Open Source Annual Report separates the rankings into a dedicated section for centralized display. This is partly to showcase better the development trends of various entities (repositories/projects, countries/regions, enterprises, foundations, developers, etc.) in the open source ecosystem, and another important reason is the maturation of the OpenRank indicators and the completeness of global data. + +With the addition of global data from both GitHub and Gitee this year, we are able to take a global perspective with China's open source as the starting point, allowing the world to see the joint efforts and contributions of Chinese enterprises, foundations, developers, and other entities in developing the global open-source ecosystem, which is not available in other reports on the market. + +### 2.1 Global Open Source Repository OpenRank Rankings + +![2-1](/image/data/chapter_2/2-1.png) + +
Figure 2.1 Global Open Source Project OpenRank Rankings (Top 20)
+ +### 2.2 China Open Source Project OpenRank Rankings + +![2-2](/image/data/chapter_2/2-2.png) + +
Figure 2.2 China Open Source Project OpenRank Rankings (Top 20)
+
+ +> Chinese open-source projects are based on data from the OpenDigger project tags, and a single project may include multiple organizations or repositories on GitHub or Gitee platforms. + +### 2.3 Global Enterprise OpenRank Rankings + +![2-3](/image/data/chapter_2/2-3.png) + +
Figure 2.3 Global Enterprise OpenRank Rankings (Top 20)
+
+ +> Enterprise rankings are based on data from OpenDigger project tags, meaning the sum of all open source projects initiated by a certain enterprise's OpenRank, including projects donated to foundations. + +### 2.4 China Enterprise OpenRank Rankings + +![2-4](/image/data/chapter_2/2-4.png) + +
Figure 2.4 China Enterprise OpenRank Rankings (Top 20)
+ +### 2.5 Global Foundation OpenRank Rankings + +![2-5](/image/data/chapter_2/2-5.png) + +
Figure 2.5 Global Foundation OpenRank Rankings (Top 10)
+ +### 2.6 Country and Region OpenRank Rankings + +![2-6](/image/data/chapter_2/2-6.png) + +
Figure 2.6 Country and Region OpenRank Rankings (Top 20)
+
+ +> Country and region data is based on location information filled in by GitHub developers, with a sample size of the top 10 million OpenRank users globally. + +### 2.7 Global Developer OpenRank Rankings + +![2-7](/image/data/chapter_2/2-7.png) + +
Figure 2.7 Global Developer OpenRank Rankings (Top 30)
+ +### 2.8 China Developer OpenRank Rankings + +![2-8](/image/data/chapter_2/2-8.png) + +
Figure 2.8 China Developer OpenRank Rankings (Top 30)
+
+ +> Chinese developer accounts are based on OpenDigger tag data. + +## 3. Enterprise Insights + +Enterprises are the core force driving the development of the global open-source ecosystem. They are initiators, as well as developers and maintainers, at the forefront of the development and commercial exploration of open-source projects. + + +### 3.1 Evolution of Global Enterprise OpenRank Over the Past 10 Years + +![3-1](/image/data/chapter_3/3-1.png) + + +![3-1](/image/data/chapter_3/3-1.png) + +
Figure 3.1 Changes in China Enterprise OpenRank Rankings
+
+ +Observations on the global impact of enterprise open source are as follows: + +- Microsoft began laying out open source over a decade ago (in 2008) and reached the pinnacle of global open source influence in 2016, a position it has held unchallenged to this day. +- Since being officially sanctioned by the United States in 2019, Huawei has made open source a strategic priority. It has been soaring ever since and surpassed Google and Amazon this year. +- Alibaba has been a leader in domestic open source until 2021 and has maintained its sixth position globally. +- Ant Group's performance in the past three years has been remarkable, and it officially entered the top ten in the world in 2023. +- Baidu, the fourth largest player in domestic open source, has fallen to 12th globally due to rapid changes in the domestic open source landscape. +- According to the [OpenLeaderboard](https://open-leaderboard.x-lab.info/), Chinese enterprises entering the top 30 globally also include ByteDance (18), PingCAP (19), Feizhiyun (24), Deepin (25), Tencent (26), and Espressif (27). + +### 3.2 Evolution of China Enterprise OpenRank Over the Past 10 Years + +![3-2](/image/data/chapter_3/3-2.png) + +
Figure 3.2 Changes in China Enterprise OpenRank Rankings
+
+ +This chart effectively demonstrates the open-source strategies of domestic companies and their changing trends: + +Huawei began to make efforts in 2019 and, in just two years, achieved first place in China and second place globally. +As traditional domestic leaders in open source, Alibaba and Ant have shown stable performance. +- Baidu has slipped to fourth place due to competition from the first three. +- ByteDance has made visible and rapid progress in recent years. +- Espressif (Espressif Systems) is a relatively low-profile semiconductor open-source leader in China. +- Fit2Cloud is another low-key but pragmatic open-source enterprise, with several open-source software under its belt being highly favored by developers. +- Tencent, PingCAP, JD, and TAOS have shown a slight downward trend in the past two years, indicating that competition in the post-pandemic era will intensify. + + +### 3.3 Proportion of China Enterprises' OpenRank on GitHub/Gitee Platforms + +
+ 3-3 + 3-4 +
+ +
Figure 3.3 Proportion of China Enterprises' OpenRank among Global Enterprises (Left) and Comparison of OpenRank between Chinese and American Enterprises at the Project Level (Right)
+
+ +The left chart shows the trend of increasing influence of Chinese enterprises in the global open source ecosystem, while the right chart reflects the trend of ups and downs between China and the United States in the post-trade war era, especially after the pandemic. The influence of Chinese open source has risen significantly, as has the influence of companies like Huawei. However, it can also be seen that the gap between Chinese and American enterprises in overall open source influence is still significant (about 3 times the difference). Still, this momentum is very promising for the future. + +## 4. Foundations Insights + +This section examines the development of open-source ecology from a foundation perspective. Foundations are non-profit organizations that play a crucial role in organizing, developing, and innovating open-source projects and communities. They provide comprehensive support in technology, operations, and law to incubate open-source software and guide the building and operation of open-source communities. Foundations act as incubators and accelerators and are essential organizers of the open-source ecosystem. This year, we have included a separate section on insights from open-source foundations, where we can see the global impact of China's open-source foundations. + +### 4.1 Global Foundation OpenRank trend analysis + +
+ + + +
+
Figure 4.1 Global Foundation OpenRank Overall Trend
+
+ +The following trends can be seen in: + +- The Apache Foundation's #1 ranking has evolved at a mature and steady pace, and today it remains the first choice for many companies to develop globalization projects; +- OpenAtom Open Source Foundation was founded more than three years ago, the rapid development of its projects, and the total impact of its projects beyond the Linux Foundation's sub-foundations, ranked second only after the Apache Foundation; +- LF AI & Data ranked third, outpacing CNCF in cloud-native due to advancements in AI.; +- The development of the other (sub)foundations has generally been relatively stable.. + +### 4.2 Global Foundation project OpenRank trend analysis + +
+ +
+
Figure 4.2 Global Foundation Project OpenRank Trends
+
+ +In terms of open source projects under the Global Foundation: + +- Kubernetes continues to rank first, but influence declines every year, giving way to projects in emerging areas; +- Doris, an open source real-time data warehouse initiated by Baidu under the Apache Foundation, has grown rapidly in recent years and ranks second; +- OpenHarmony, a project of OpenAtom Open Source Foundation, and its various sub-repositories are a close second. If combined, they would rank #1. + +### 4.3 Analysis of Trends in OpenRank Projects under Foundation in China + +
+ +
+
Figure 4.3 Trends in OpenRank Projects under Foundation in China
+
+ +Chinese projects under various foundations are examined separately: + +- Doris and OpenHarmony are developing most noticeably; +- The Milvus Vector Database has experienced rapid growth due to demand in the AIGC domain; +- Projects like Flink and ShardingSphere are relatively stable. + +### 4.4 Analysis of Trends in OpenRank Projects under the Open Atom Foundation + +
+ +
+
Figure 4.4 Trends in OpenRank Projects under the Open Atom Foundation
+
+ +This year marks the first time we can observe the development of projects under the Open Atom Flag: + +- The top three are OpenHarmony, openEuler, and Anolis, representing the absolute status of the operating system, especially OpenHarmony, which is developing the fastest; +- Other listed projects are developing steadily, and we look forward to their progress in the new year. + + +## 5. Technological insights + +The technology field is rapidly evolving, especially in various subfields. **Operating systems** are being developed in new architectures, **cloud native** are driving digital transformation, **databases** are becoming the infrastructure for data innovation, **big data** is facilitating intelligent decision-making, **artificial intelligence** is accelerating automation in various industries, and **front-end** technologies are focusing on interaction and aesthetics. These areas are at the forefront of technology, attracting innovators and investors and creating a booming trend. In this section, we will provide insights into these six areas in terms of two metrics: influence and activity. + +### 5.1 Overall development trend of six major technology areas in the past five years + +![5-1](/image/data/chapter_5/5-1.png) + +
Figure 5.1 Trends in OpenRank by subfield over the last 5 years
+ +![5-2](/image/data/chapter_5/5-2.png) + +
Figure 5.2 Trends in activity by subfield over the past five years
+
+ +Cloud-native computing and artificial intelligence (AI) have gained popularity in the past five years, reflected in their increased number of repositories. Databases remain critical, while the influence of front-end development is shrinking. Operating systems have a smaller number of repositories but hold great value. + +### 5.2 5-Year Trends in OpenRank and Activity for the Top 10 Projects in Each Technology Area + +#### 5.2.1 Cloud Native + +![5-3](/image/data/chapter_5/5-3.png) + +
Figure 5.3 Trends in the Cloud-Native Top 10 OpenRank Projects over the Last Five Years
+ +![5-4](/image/data/chapter_5/5-4.png) + +
Figure 5.4 Cloud-Native Top 10 Active Project Trends in the Last Five Years +
+
+ +Both indicators of Kubernetes have significantly decreased, while Grafana has emerged as the top influencer. The llvm-project has shown remarkable growth and has become the most active project in the past three years. LLVM is a compiler framework that comprises a collection of modular and reusable compiler as well as toolchain technologies. Its rapid growth in popularity among developers is a testament to its effectiveness. + +#### 5.2.2 Artificial intelligence + +![5-5](/image/data/chapter_5/5-5.png) + +
Figure 5.5 Trends in the AI Top 10 OpenRank Projects over the Last Five Years
+ +![5-6](/image/data/chapter_5/5-6.png) + +
Figure 5.6 Artificial Intelligence Top 10 Active Project Trends in the Last Five Years +
+
+ +TensorFlow has been declining and is out of the top 5, while Pytorch is growing and widening the gap. LangChain, an open-source software project by Harrison Chase, is in second place in both indicators since it launched in October 2022 and is now one of the most popular frameworks for LLM development. + +#### 5.2.3 Big Data + +![5-7](/image/data/chapter_5/5-7.png) + +
Figure 5.7 Trends in the Big Data Top 10 OpenRank Projects in the Last Five Years
+ +![5-8](/image/data/chapter_5/5-8.png) + +
Figure 5.8 Big Data Top 10 Active Projects Trends in the Last 5 Years
+
+ +Kibana and Grafana are the top two big data solutions, with a consistent upward trend. Grafana is predicted to surpass Kibana and become the top-ranked solution in the future. + +Kibana is an open-source tool for data visualization and exploration, tightly integrated with ElasticSearch. + +Grafana is an open-source tool for monitoring and reporting. It can visualize data from various sources, including Prometheus, InfluxDB, and Graphite, among others. Grafana's data processing and visualization features enable the creation of different charts and dashboards. + +#### 5.2.4 Database + +![5-9](/image/data/chapter_5/5-9.png) + +
Figure 5.9 Trends in the Database Top 10 OpenRank Projects over the Last Five Years +
+ +![5-10](/image/data/chapter_5/5-10.png) + +
Figure 5.10 Database Top 10 Active Project Trends in the Last Five Years
+
+ +Doris is the fastest-growing database, with activity metrics nearing the top spot, while ElasticSearch is dropping back in popularity. It is predicted that Doris will surpass ClickHouse in the future. + +ClickHouse is an open source MPP architecture designed by Yandex. It analyzes large amounts of data and is claimed to be 100-1000x faster than traditional databases. Key feature: high-performance vectorized execution engine. Also known for rich functionality and reliability. + +Apache Doris is contributed by Baidu open source MPP analytical database products , distributed architecture is simple , easy to operate and maintain . + +#### 5.2.5 Frontend + +![5-11](/image/data/chapter_5/5-11.png) + +
Figure 5.11 Trends in the Frontend Top 10 OpenRank Projects over the Last Five Years
+ +![5-12](/image/data/chapter_5/5-12.png) + +
Figure 5.12 Frontend Top 10 Active Project Trends in the Last Five Years
+
+ +While declining in both indicators year over year, Flutter still has a clear advantage over Next.js, which started to gain momentum in 2023 and is rising significantly. The 3-10 ranked programs are highly competitive, with little gap between them. + +Flutter is a framework developed and supported by Google. Front-end and full-stack developers use Flutter to build the user interface of applications for multiple platforms with a single code base. + +Next.js is an open source platform created by Vercel, built with Node.js and Babel translators and designed for use with React Single Page Application Framework. In addition, Next.js provides many useful features, such as preview mode, rapid developer compilation and static export. + +#### 5.2.6 Operating system + +![5-13](/image/data/chapter_5/5-13.png) + +
Figure 5.13 Trends in the Operating System Top 10 OpenRank Projects over the Last Five Years
+
+ +![5-14](/image/data/chapter_5/5-14.png) + +
Figure 5.14 Operating System Top 10 Active Project Trends in the Last Five Years
+
+ +As you can see, several repositories under the OpenHarmony project are in the top 10 list. This insight combines data from the Gitee platform so you can more intuitively see the advantages of domestic operating systems in various aspects (there are several repositories under the OpenHarmony project, and this insight analyzes them in terms of repositories). SerenityOS has fallen back a bit since 2021 and is second only to OpenHarmony and OpenEuler, which also have good performance. + +### 5.3 OpenRank Top 10 list for each field in 2023 + +Below are the OpenRank rankings for projects in each field for 2023. + +#### 5.3.1 Cloud Native + +Table 5.1 Top Projects in Cloud Native + +| Number | Project Name | OpenRank | +| :----: | :--------------------: | :------: | +| 1 | grafana/grafana | 7134.37 | +| 2 | lvm/llvm-project | 7049.62 | +| 3 | kubernetes/kubernetes | 5374.14 | +| 4 | ClickHouse/ClickHouse | 4941.99 | +| 5 | cilium/cilum | 3215.42 | +| 6 | ceph/ceeph | 3172.49 | +| 7 | keycloak/keycloak | 3095.56 | +| 8 | gravitational/teleport | 3082.18 | +| 9 | envoyproxy/envoy | 2929.08 | +| 10 | backstopage/package | 2903.39 | + +#### 5.3.2 Artificial Intelligence + +Table 5.2 Top Projects in Artificial Intelligence + +| Number | Project Name | OpenRank | +| :----: | :----------------------------------: | :------: | +| 1 | pytorch/pytorch | 10182.45 | +| 2 | langchain-ai/langchain | 6080.25 | +| 3 | Paddle/Paddle | 5408.62 | +| 4 | huggingface/transformers | 4422.84 | +| 5 | AUTOMATIC1111/stable-diffusion-webui | 3881.6 | +| 6 | openvinoolkit/openvinvinino | 3857.31 | +| 7 | microsoft/onnxruntime | 3006.75 | +| 8 | tensorflow/tensor | 2723.26 | +| 9 | Significant-Gravitas/AutoGPT | 2664.85 | +| 10 | ggerganov/llama.cpp | 2339.8 | + +#### 5.3.3 Big Data + +Table 5.3 Top Projects in Big Data + +| Number | Project Name | OpenRank | +| :----: | :-------------------: | -------- | +| 1 | elastic/kibana | 7601.04 | +| 2 | grafana/grafana | 7134.37 | +| 3 | ClickHouse/ClickHouse | 4941.99 | +| 4 | airbytehq/airbyte | 4658.86 | +| 5 | apache/doris | 4307.26 | +| 6 | elastic/elasticsearch | 3729.39 | +| 7 | apache/airflow | 3642.9 | +| 8 | StarRocks/starrocks | 3194.56 | +| 9 | trinodb/trino | 2703.4 | +| 10 | apache/spark | 2654.02 | + +#### 5.3.4 Database + +Table 5.4 Top Projects in Database + +| Number | Project Name | OpenRank | +| :----: | :-------------------: | :------: | +| 1 | ClickHouse/ClickHouse | 4941.99 | +| 2 | apache/doris | 4307.26 | +| 3 | elastic/elasticsearch | 3729.39 | +| 4 | cockroachdb/cockroach | 3443.7 | +| 5 | StarRocks/starrocks | 3194.56 | +| 6 | trinodb/trino | 2703.4 | +| 7 | apache/spark | 2654.02 | +| 8 | pingcap/tidb | 2200.38 | +| 9 | milvus-io/milus | 2001.11 | +| 10 | yugabyte/yugabyte-db | 1940.75 | + +#### 5.3.5 Frontend + +Table 5.5 Top Projects in Frontend + +| Number | Project Name | OpenRank | +| :----: | :-------------------: | :------: | +| 1 | flutter/futter | 9361.81 | +| 2 | vercel/next.js | 6638.65 | +| 3 | appsmithorg/appsmith | 3474.07 | +| 4 | nuxt/nuxt | 3387.23 | +| 5 | facebook/react-native | 3260.55 | +| 6 | Ant-design/ant-design | 3053.25 | +| 7 | nodejs/node | 2736.37 | +| 8 | angular/angular | 2273.82 | +| 9 | Electron/electron | 1773.31 | +| 10 | denoland/denoo | 1654.01 | + +#### 5.3.6 Operating system + +Table 5.6 Top Projects in Operating System + +| Number | Project Name | OpenRank | +| :----: | :---------------------------------------------------------------------------: | :------: | +| 1 | openharmony/docs | 3277.69 | +| 2 | openharmony/arkui_ace_engagement | 2818.09 | +| 3 | SerenityOS/serenity | 2257.68 | +| 4 | openharmony/graphic_graphic_2d | 1239.6 | +| 5 | openeuer/docs | 1206.9 | +| 6 | openharmony/xts_acts | 1186.06 | +| 7 | openharmony/arkcompiler_ets_runtime | 961.99 | +| 8 | openharmony/interface_sdk-js | 910.91 | +| 9 | reactos/reactos | 745.23 | +| 10 | armbian/build | 679.1 | + +## 6. Insights on open source projects + +In 2023, large AI models like GPT-4 and CLIP emerged, leading to competition among global enterprises to invest in research and development for cutting-edge technologies like language understanding and image generation. The industry saw rapid evolution, marking the beginning of a new era in the broad application of AI. The database field experienced a trend of innovation with various technologies like distributed databases, time-series databases, and graph databases emerging to cater to different application scenarios. Cloud-native databases became popular, offering flexible scaling and high availability. This section provides data insights on project types by statistically analyzing project topics. In-depth insights are also provided into the two core areas of database and AI. + +### 6.1 Type of project + +This subsection selects the top 10,000 active GitHub repositories for statistical analysis. + +#### 6.1.1 Ratios for different project types + +6-1 + +
Figure 6.1 Ratios for different project types
+
+ +- Software development primarily comprises components and frameworks (libraries and frameworks), which constitute 31.36% of it. Developers enjoy using these open-source collaborative innovations, which are the most popular types to contribute to; +- The Application Software category is second only to the Component Framework category (24.34%) due to its utility, enabling all users (not just developers) to utilize open source software in a variety of industries and domains; +- Non-Software content holds a significant share of 23.17%. It shows the growing trend of open-source as a collaborative development model that extends to the entire content domain, including documentation, education, art, hardware, and other non-programming-related areas; +- Developers find the Software Tools category valuable as it allows them to focus on building software applications and products, making up 18.9% of their work; +- The System Software category comprises fundamental software, accounting for only 2.3% of the total despite its immense value and complexity. + +#### 6.1.2 Percentage of OpenRank by Project Type + +6-2 + +
+
Figure 6.2 Percentage of OpenRank by Project Type
+
+ +Let's take this a step further and look at these categories through the lens of OpenRank influence: + +- The most significant change is that content resource type (Non-Software) projects have relatively low impact, although they have high activity; +- System Software, on the other hand, has a small percentage of activity but a relatively large percentage of influence, and a similar phenomenon can be observed with Software Tools projects; +- The component framework type and the application software type have not changed much, and both are among the more prevalent types. + +#### 6.1.3 OpenRank Trends by Project Type in the Last 5 Years + +6-3 + +
+
Figure 6.3 OpenRank Trends by Project Type in the Last 5 Years
+
+ +As you can see from the five-year OpenRank evolution chart above, the influence of the System Software category is increasing year by year, while the influence of the Non Software category is decreasing. + +### 6.2 Project Topic Analysis + +This section also analyzes the top 10,000 active GitHub repositories and obtains insights from the Topic tags under the repositories. + +#### 6.2.1 Top Topic + +6-4 + +Figure 6.4 Top 10 appearances of Topic
+ +The top 10 topics cover a diverse range of areas, demonstrating the broad interest of the open-source community. JavaScript, Hacktoberfest, and Python are some of the most popular topics, representing hotspots for cutting-edge technologies, active community activities, and versatile programming languages. These topics highlight the interest in front-end development, open-source contributions, and interdisciplinary programming. + +#### 6.2.2 Overall OpenRank Trends for Repositories of Popular Topics + +
6-5
+ +Figure 6.5 OpenRank trends for repositories with top 10 Topic occurrences (2019 - 2023) +
+ +- Hacktoberfest is an annual event that takes place in October. It aims to promote the open-source community and is organized by DigitalOcean in collaboration with GitHub. The goal of the event is to encourage more people to participate in open-source projects and contribute to the community. OpenRank is used to measure people's enthusiasm for open-source projects, community involvement, and contributions. Developers play an active role in the campaign by submitting Pull Requests to open-source projects, thus helping to increase the reputation and influence of the repository. +- JavaScript and Python:technologies have maintained relatively stable trends over the past few years, with no significant growth or decline. + +### 6.3 Project analysis in databases + +This section uses information from open-source databases, which are disclosed in the [Database of Databases](https://dbdb.io/) and [DB-Engines Ranking](https://db-engines.com/en/ranking). The field is divided into 18 subcategories based on the storage structure and usage of databases. These subcategories include Relational, Key-value, Document, Search Engine, Wide Column, Time Series, Graph, Vector, Object Oriented, Hierarchical, RDF, Array, Event, Spatial, Native XML, Multivalue, Content, and Network. We then collect and analyze corresponding database information on GitHub. We examine the corresponding open-source projects for each database and gather and analyze their collaboration log data on GitHub. This helps us gain detailed insights into the field. + +#### 6.3.1 2023 OpenRank and Activity Lists by Subdomain in the Database Domain + +**1, OpenRank Rankings for Database Subdomains** + +Table 6.1 OpenRank Rankings for Database Subdomains + +| Ranking | Subfield Name | OpenRank | +| :-----: | :-------------: | :------: | +| 1 | Relational | 58092.36 | +| 2 | Key-value | 21834.08 | +| 3 | Document | 17264.93 | +| 4 | Search Engine | 8093.77 | +| 5 | Wide Column | 7896.43 | +| 6 | Time Series | 7813.54 | +| 7 | Graph | 5196.52 | +| 8 | Vector | 4965.41 | +| 9 | Object Oriented | 3104.07 | +| 10 | Hierarchical | 1355.4 | +| 11 | RDF | 592.68 | +| 12 | Array | 383.95 | +| 13 | Event | 256.59 | +| 14 | Spatial | 224.05 | +| 15 | Native XML | 209.51 | +| 16 | Multivalue | 15.89 | +| 17 | Content | 3.43 | + +**2, Activity Rankings for Database Subdomains** + +Table 6.2 Activity Rankings for Database Subdomains + +| Ranking | Subfield Name | Activity | +| :-----: | :-------------: | :-------: | +| 1 | Relational | 161025.44 | +| 2 | Key-value | 62501.64 | +| 3 | Document | 49400.11 | +| 4 | Search Engine | 23799.87 | +| 5 | Time Series | 22077.57 | +| 6 | Wide Column | 21292.17 | +| 7 | Vector | 16395.88 | +| 8 | Graph | 14947.43 | +| 9 | Object Oriented | 8418.14 | +| 10 | Hierarchical | 3406.55 | +| 11 | RDF | 1701.67 | +| 12 | Array | 1280.14 | +| 13 | Native XML | 737.94 | +| 14 | Spatial | 680.79 | +| 15 | Event | 654.42 | +| 16 | Content | 33.94 | +| 17 | Multivalue | 12.68 | + +The OpenRank and activity rankings for 2023 for each sub-domain of the database domain show that: + +- Relational, key-value, and document databases are the top three subdomains, accounting for over 70% of the database domain; +- Relational's two indicators exceeded those of the second through fifth-place finishers combined and accounted for more than 40 percent of the database field, making it a mega-subcategory. + +#### 6.3.2 Trends over the last five years in projects under the various subfields of the database area + +![6-6](/image/data/chapter_6/6-6.png) + +Figure 6.6 Trends in OpenRank by Subdomain in Database Domain (2019 - 2023) + +![6-7](/image/data/chapter_6/6-7.png) + +Figure 6.7 Trends in Activity by Subdomain in Database Domain (2019 - 2023) + +The trend of OpenRank and the trend of activity of projects in each subdomain of the database domain over the past five years shows that: + +- Over the past five years, Relational, Key-value, and Document have consistently ranked in the top three in both indicators; +- Search Engine, Wide Column, Time Series, Graph, Vector, and Object Oriented ranked fourth through ninth, with both indicators trending upward; +- Search Engine and Vector subcategories have shown a fast growth rate. Search Engines have jumped two positions to become the fourth largest subcategory. Vector is still competing with the Graph subcategory and has the potential to improve its OpenRank. The influence created by the large model has not yet subsided, and it is predicted that Vector will overtake Graph by 2024. + +#### 6.3.3 Open source quadrant map of projects under each sub-domain of the database domain + +There are three metrics involved in the Open Source Quadrant diagram: Activity, Openrank, and CommunityVolume. CommunityVolume is the same formula as the Attention metric in open-digger, i.e. a weighted sum of the number of stars and the number of forks of the target project in a given period of time:`sum(1*star+2*fork)`. + +Quadrant plotting methods: + +1. Select the Top 10 projects by activity for each database subcategory; +2. Make a `log(x)-log(y)` scatterplot of `log(openrank)-log(communityvolume)`, the base of the log is 2, denote the number of half-lives required for the spatial influence openrank and the temporal influence communityvolume to decay to 1, respectively. +3. The vertical line corresponding to the mean value of the horizontal coordinates of all points on the graph is used as the vertical axis, and the horizontal line corresponding to the mean value of the vertical coordinates of all points on the graph is used as the horizontal axis to divide into four quadrants. + +There are a total of 18 subcategory labels in the database domain, and the top 9 categories that account for more than 1% of activity in 2023 were selected for statistical analysis to map the open source quadrant as follows: + +
+ +
+ +
Figure 6.8 Relational Database OpenRank-CommmunityVolume log-log Open Source Quadrant Map

+ +
+ +
+ +
Figure 6.9 Key-Value Database OpenRank-CommmunityVolume log-log Open Source Quadrant Map

+ +
+ +
+ +
Figure 6.10 Document-based databases OpenRank-CommmunityVolume log-log Open Source Quadrant Chart

+ +
+ +
+ +
Figure 6.11 Search Engine OpenRank-CommmunityVolume log-log Open Source Quadrant Chart

+ +
+ +
+ +
Figure 6.12 Time series database OpenRank-CommmunityVolume log-log Open Source Quadrant Chart

+ +
+ +
+ +
Figure 6.13 wide column database OpenRank-CommmunityVolume log-log Open Source Quadrant Chart

+ +
+ +
+ +
Figure 6.14 Vector database OpenRank-CommmunityVolume log-log Open Source Quadrant Chart

+ +
+ +
+
Figure 6.15 Graph database OpenRank-CommmunityVolume log-log Open Source Quadrant Chart
+
+ +
+ +
+
Figure 6.16 object-oriented database OpenRank-CommmunityVolume log-log Open Source Quadrant Chart
+
+ +
+ +
+
Figure 6.17 Top 9 Subcategory Databases by Activity OpenRank-CommmunityVolume log-log Open Source Quadrant Chart
+
+ +The search engine category is highly polarized, with projects like ElasticSearch with high OpenRank and CommmunityVolume, and projects like Sphinx and Xapian with very low OpenRank and CommmunityVolume. + +From the first quadrant: relational, document, search engine, and vector are all database types with strong openrank influence and CommmunityVolume focus, while object_oriented is relatively weak in both areas. + +The Open Source Quadrant plot shows the vertical distribution of the Top 9 subclasses of databases in terms of activity. Among these subclasses, two stand out - search engine and vector. These two subclasses have a higher community volume than OpenRank, which means they have more active contributors. They also have a higher community voice, meaning their opinions and feedback are more valued. Additionally, they are known for faster development expectations compared to the other subclasses. + +### 6.4 Project Analysis of Generative AI Area + +This section will examine the open-source projects related to generative AI, using the [Generative AI Open Source (GenOS) Index](https://www.decibel.vc/articles/launching-the-generative-ai-open-source-genos-index) as a reference point. We will classify these projects into four subcategories: tools, models, applications, and infrastructure. The detailed insights are outlined below: + +#### 6.4.1 Growth trends in subfields of generative AI over the past five years + +6-8 + +
Figure 6.18 OpenRank Trends in Generative AI by Subdomain, 2019 - 2023
+
+ +6-9 + +
Figure 6.19 Activity Trends in Generative AI by Subdomain, 2019 - 2023
+
+ +- Categorization analysis of activity and influence across models, tools, apps, and infrastructure reveals consistent trends; +- AIGC open source projects in the modeling category are more influential and active than those in the tools and applications categories; +- The modeling category has grown rapidly since 2022 and surpassed Infrastructure in 2023. AIGC's innovative application development had a significant breakthrough in 2023, leading to concurrent application growth. + +#### 6.4.2 Trends in OpenRank and Activity Top 10 for Projects in the Generative AI Domain + +6-10 + +
Figure 6.20 5-Year Trend of OpenRank Top 10 Projects in Generative AI
+
+ +6-11 + +
Figure 6.21 5-Year Trend of the Top 10 Active Projects in Generative AI
+
+ +- langchain is ranked #1 in terms of influence and activity and is highly regarded by developers; +- transformers has been the reigning champion in the AIGC field for the past few years, and its position is expected to remain unchallenged until 2023. This project has significantly impacted both the academic and open-source communities, showcasing its groundbreaking capabilities; +- stable-diffusion-webui is an AIGC tool that has gained a lot of attention from developers. It has surpassed "Transformers" in terms of activity and is likely to surpass it in terms of influence by 2024; +- Since being open-sourced in 2023, several AIGC projects have gained significant influence and activity, placing them on the Top 10 list. This highlights the rapid pace of change in the field of AIGC. + +#### 6.4.3 Top 10 List of OpenRank and Activity of Projects in Generative AI in 2023 + +**1. List of OpenRank Top 10 Projects in Generative AI** + +
Table 6.3 OpenRank Rankings in Generative AI +
+
+ +| Ranking | Project Name | OpenRank | +| :-----: | :----------------------------------------: | :------: | +| 1 | langchain-ai/langchain | 6080.25 | +| 2 | huggingface/transformers | 4422.84 | +| 3 | AUTOMATIC1111/stable-diffusion-webui | 3881.6 | +| 4 | Significant-Gravitas/AutoGPT | 2664.85 | +| 5 | ggerganov/llama.cpp | 2339.8 | +| 6 | oobabooga/text-generation-webui | 2242.5 | +| 7 | milvus-io/milus | 2001.11 | +| 8 | run-llama/llama_index | 1913.01 | +| 9 | facebookincubator/velox | 1589.53 | +| 10 | invoke-ai/InvokeAI | 1571.45 | + +**2. List of Top 10 Active Projects in Generative AI** + +
Table 6.4 Activity Rankings in Generative AI +
+
+ +| Ranking | Project Name | Activity | +| :-----: | :----------------------------------------: | :------: | +| 1 | langchain-ai/langchain | 22563.04 | +| 2 | AUTOMATIC1111/stable-diffusion-webui | 13933.03 | +| 3 | huggingface/transformers | 13618.11 | +| 4 | Significant-Gravitas/AutoGPT | 10961.81 | +| 5 | cobabooga/text-generation-webui | 8597.33 | +| 6 | ggerganov/llama.cpp | 8108.62 | +| 7 | run-llama/llama_index | 7532.47 | +| 8 | milvus-io/milus | 6488.35 | +| 9 | facebookincubator/velox | 4923.05 | +| 10 | Chatchat-space/Langchain-Chatchat | 4477.63 | + +## 7. Developer Insights + +**Developers** are vital to open-source innovation. They create and supply open-source projects and contribute significantly to them. The total number of developers and their collaboration mechanism impact the amount of contribution. In this section, we will analyze data on individual developers at national and regional levels. + +### 7.1 Geographical distribution of developers + +This analysis, like the one in Section 1.3, is based on 10 million active GitHub developers. Out of the 100 million registered users on GitHub, only 2 million developers have provided accurate geolocation information, which makes up a 2% sample. + +**1. GitHub Active Developers Distribution Map** + +The number of active developers on GitHub was first visualized on a map, as shown below. + +![7-1.png](/image/data/chapter_7/7-1.png) + +Figure 7.1 2023 GitHub Active Developers Distribution Map +
+ +GitHub developers are concentrated in areas with large populations and fast internet development, such as coastal regions of China, Europe, the United States, India, and the southeast coast of Brazil. They are sparsely distributed in other areas with small populations or less developed internet. + +**2. GitHub Active Developers by Country / Region** + +![7-2.png](/image/data/chapter_7/7-2.png) + +
Figure 7.2 GitHub Active Developers by Country / Region +
+
+ +Table 7.1 2023 Ranking of Countries/Regions by Number of Active Developers +
+ +| Ranking | States | Number of active | +| :-----: | :------------: | :--------------: | +| 1 | United States | 236899 | +| 2 | China | 113893 | +| 3 | India | 107066 | +| 4 | Brazil | 83932 | +| 5 | Germany | 64836 | +| 6 | United Kingdom | 55175 | +| 7 | Canada | 42238 | +| 8 | France | 40341 | +| 9 | Russia | 31534 | +| 10 | Japan | 21942 | + +The United States has the largest number of developers, followed by China, India and Brazil, while other countries with a certain population and economic level, such as Canada and some European countries, also have a large number of developers on GitHub. + +**3. Distribution of Active Developers on GitHub in China** + +The graph below visualizes the distribution of the number of active developers on GitHub on a map. + +![7-4.png](/image/data/chapter_7/7-4.png) + +
Figure 7.3 2023 Distribution of Active Developers in China
+
+ +Table 7.2 2023 Regional Ranking of Active Developers in China +
+ +| Ranking | Regions | Quantity | +| :-----: | :-------: | :------: | +| 1 | Beijing | 24151 | +| 2 | Sengah | 18215 | +| 3 | Guangdong | 16153 | +| 4 | Zhejiang | 10927 | +| 5 | Taiwan | 8823 | +| 6 | Jiangsu | 5437 | +| 7 | Chechen | 5311 | +| 8 | Hong Kong | 3344 | +| 9 | Hubei | 3273 | +| 10 | Shaanxi | 1993 | + +Beijing is found to have the most GitHub users in China, followed by Shanghai, Guangzhou, and Zhejiang. Most of China's active GitHub users are in the eastern coastal regions, while some central provinces such as Shaanxi, Hunan, and Hubei also have a lot of active users, and it's worth noting that Sichuan has the most active GitHub users outside of the coastal regions. + +**4. GitHub China Developer Influence Distribution after OpenRank Weighting** + +Trying to do the aggregation with the OpenRank value of the developers in each region, we get the influence distribution map and regional ranking of Chinese developers, as shown in the following graph. + +![7-3.png](/image/data/chapter_7/7-3.png) + +
Figure 7.4 OpenRank influence distribution of Chinese developers
+
+ +Table 7.3 OpenRank Influence Ranking in China +
+ +| Ranking | Regions | OpenRank | +| :-----: | :-------: | :-------: | +| 1 | Beijing | 506624.08 | +| 2 | Sengah | 435804.42 | +| 3 | Guangdong | 306014.24 | +| 4 | Zhejiang | 274284.92 | +| 5 | Taiwan | 216991.49 | +| 6 | Chechen | 96881.79 | +| 7 | Jiangsu | 83321.13 | +| 8 | Hong Kong | 83238.46 | +| 9 | Hubei | 51370.74 | +| 10 | Fujian | 33482.25 | + +As you can see from the rankings, the OpenRank regional rankings are highly consistent with the regional rankings for the number of active developers: + +- There are significant regional differences in terms of the influence of Chinese developers. Developers from Beijing and Shanghai dominate the first class, while developers from Guangdong, Zhejiang, and Taiwan fall into the second class. These regions have a different level of influence compared to those ranked lower; +- The overall number of active people in Sichuan is smaller than in Jiangsu, but the overall influence is greater, and the same phenomenon occurs in Fujian and Shaanxi. + +### 7.2 Developer Working Hours Analysis + +This section analyzes the working hours of GitHub and Gitee developers. By default, the time is in the UTC zone, with an 8-hour lag compared to the East Eighth Time Zone, i.e., Beijing Standard Time. The data is scaled to the [1-10] range by default using the min-max method, with larger dots representing higher values in the time zone graph. + +#### 7.2.1 Distribution of working hours of global developers + +**Distribution of working hours of GitHub-wide developers** + +According to statistics on developers' working hours across GitHub, the majority of developers work between 6 and 21 hours. There is a higher concentration of developers working at 12 o'clock, likely due to timed tasks. Weekends (Saturdays and Sundays) are relatively inactive. + +![7-5.png](/image/data/chapter_7/7-5.png) + +
Figure 7.5 GitHub-wide developer working hours in 2023
+
+ +**Distribution of working hours of Gitee-wide developers** + +![7-6.png](/image/data/chapter_7/7-6.png) + +
Figure 7.6 Gitee-wide developer working hours in 2023
+
+ +The Gitee data clearly aligns more with the East Eighth Time Zone's work time routine. + +**Global developer working hours distribution, excluding bots** + +![7-7.png](/image/data/chapter_7/7-7.png) + +
Figure 7.7 2023 Global Developers' Working Hours, Excluding Robots
+
+ +RAfter removing the bot data, it is found that the time distribution of developers is more prevalent in the interval of 6:00 - 21:00, which is more evenly distributed. + +#### 7.2.2 Distribution of working hours on the project + +Below is a comparison of the working hours distribution of the top four Chinese OpenRank repositories and the top four global OpenRank GitHub repositories in 2023. + +Distribution of working hours on the top four OpenRank projects in the global GitHub repository + +1. NixOS/Nixpkg + +![7-8.png](/image/data/chapter_7/7-8.png) + +
Figure 7.8 NixOS/nixpgs Working Hours in 2023
+
+ +2. Home-assistanceant/core + +![7-9.png](/image/data/chapter_7/7-9.png) + +
Figure 7.9 home-assistant/core Working Hours in 2023
+
+ +3. microsoft/vscode + +![7-10.png](/image/data/chapter_7/7-10.png) + +
Figure 7.10 Microsoft/vscode Working Hours in 2023
+
+ +4. MicrosoftDocs/azure-docs + +![7-11.png](/image/data/chapter_7/7-11.png) + +
Figure 7.11 MicrosoftDocs/azure-docs Working Hours in 2023
+
+ +**Distribution of working hours of the top 4 OpenRank repositories in China** + +1. OpenHarmony + +![7-12.png](/image/data/chapter_7/7-12.png) + +
Figure 7.12 OpenHarmony Working Hours in 2023
+
+ +2. openEuler + +![7-13.png](/image/data/chapter_7/7-13.png) + +
Figure 7.13 openEuler Working Hours in 2023
+
+ +3. PaddlePaddle + +![7-14.png](/image/data/chapter_7/7-14.png) + +
Figure 7.14 PaddlePaddle Working Hours in 2023
+
+ +4. MindSpore + +![7-15.png](/image/data/chapter_7/7-15.png) + +
Figure 7.15 MindSpore Working Hours in 2023
+
+ +### 7.3 Developer Role Analysis + +This section categorizes GitHub users into four roles: **Explorer**, **Participant**, **Contributor**, and **Committer**, based on events they trigger in open-source repositories. The four roles are defined in the table below. + +
Table 7.5 Four Roles of Developer +
+
+ +| Roles | Definitions | Meaning | +| -------------------------------------------- | ----------------------------------------------------------------- | ---------------------------------------------------- | +| Explorer | Users who star a project | Indicates the user has some interest in the project | +| Participants | Users who have made an Issue or Comment on a project | Indicates user participation in the project | +| Contributor | Users with Pull Requests (PRs) for a project | Indicates that the user has contributed to the project's code base | +| Commiter | Users participating in PR-review or merge | Indicates that the user has contributed deeply to the project | + +The figure below shows the four cascaded and structured roles. Using the defined role structure, we evaluate the top 10 projects in the OpenRank rankings of GitHub-wide projects from three perspectives: number of roles, time change, and developer role evolution. This is based on the project ranking list in Part II. + +![7-16.png](/image/data/chapter_7/7-16.png) + +
Figure 7.16 Developer Roles and Relationships
+
+ +#### 7.3.1 Distribution of roles + +
Table 7.6 Distribution of the number of developer roles for the top 10 projects in the OpenRank rankings +
+
+ +| Repository name | Explorer | Participant | Contributor | Committer | +| ---------------------------------------- | -------- | ------------ | ------------ | --------- | +| NixOS/Nixpkg | 6244 | 3381 | 3074 | 2638 | +| Home-assistanceant/core | 17777 | 9116 | 1230 | 905 | +| microsoft/vscode | 20113 | 16027 | 525 | 339 | +| MicrosoftDocs/azure-docs | 8939 | 2282 | 1591 | 610 | +| pytorch/pytorch | 13237 | 6391 | 1230 | 685 | +| godotenine/godot | 23426 | 7203 | 1020 | 569 | +| flutter/futter | 14056 | 11101 | 637 | 334 | +| odooo/odoo | 5078 | 1841 | 930 | 570 | +| digitalinnovationone/dio-lab-open-source | 3619 | 907 | 504 | 40 | +| microsoft/winget-pkgs | 1852 | 1395 | 1384 | 286 | + +
+ +![7-17.png](/image/data/chapter_7/7-17.png) + +
Figure 7.17 Developer Role Distribution Map
+
+ +Spring: + +- Based on the number of explorers, the three most popular projects are godotengine/godot, microsoft/vscode, and home-assistant/core, suggesting they have received widespread attention and support; +- microsoft/vscode is the project with the largest gap between the number of participants and contributors, while microsoft/winget-pkgs has the smallest gap between the two; +- NixOS/nixpkgs has the highest number of committers at 2,638 compared to other projects. In contrast, the digitalinnovationone/dio-lab-open-source project has the lowest number of committers. + +#### 7.3.2 New additions to roles in 2023 + +Role additions are counted as valid additions to role X if a user who was not in role X (e.g., a contributor or submitter role) before 2023 becomes in that role in 2023. + +For example, if A submits a PR to Project B in 2021 (but never participates in the Code Review process), and A reviews the PR in Project B in 2023, A is the new committer. + +The details of the roles added are shown in the graph below and the table below. + +![7-18.png](/image/data/chapter_7/7-18.png) + +
Figure 7.18 Map of new roles in the open source community in 2023
+
+ +
Table 7.7 Distribution of the number of new developer roles for the top 10 projects in the OpenRank rankings +
+
+ +| Repository name | New Committer | New Contributor | New Participant | New Explorer | +| ---------------------------------------- | ------------- | --------------- | --------------- | ------------ | +| NixOS/Nixpkg | 1226 | 1622 | 1591 | 3027 | +| Home-assistanceant/core | 538 | 808 | 4640 | 8998 | +| microsoft/vscode | 263 | 394 | 10216 | 15746 | +| MicrosoftDocs/azure-docs | 352 | 1420 | 3913 | 1579 | +| pytorch/pytorch | 391 | 802 | 2083 | 13016 | +| godotenine/godot | 386 | 708 | 2834 | 22996 | +| flutter/futter | 184 | 455 | 3954 | 13579 | +| odooo/odoo | 244 | 453 | 472 | 4991 | +| digitalinnovationone/dio-lab-open-source | 40 | 3611 | 732 | 504 | +| microsoft/winget-pkgs | 231 | 957 | 485 | 1373 | + +The results showed: + +- The repository godotengine/godot received the highest number of stars, 22,996, with half added in September 2023 due to game developers seeking open-source alternatives to Unity's new charging strategy. Meanwhile, digitalinnovationone/dio-lab-open-source and Microsoft/winget-pkgs received the fewest new stars, 504 and 1,373, respectively; +- The repository with the highest number of new participants was microsoft/vscode with 10,216; digitalinnovationone/dio-lab-open-source had the fewest new Issues with 732; +- The repository with the highest number of new contributors was NixOS/nixpkgs with 1,622; +- The repository with the highest number of new committers was also NixOS/nixpkgs with 1,226. + +#### 7.3.3 Perspectives on Developer Evolution + +The developer evolution process is defined as the number of roles in an open-source community that moves to other roles. This report only measures the number of developers who have moved from one role to a more profound one. For example, a user who participated until 2023 will change from a participant to a contributor in 2023 when they make their first PR. + +![7-19.png](/image/data/chapter_7/7-25.png) + +
Figure 7.19 Developer Role Evolution Diagram
+
+ +
Table 7.8 Distribution of the number of role conversions for the top 10 OpenRank projects +
+
+ +| Repository name | Contributor -> Committer | Participant -> Contributor | Explorer -> Participant | +| :--------------------------------------: | :----------------------: | :-----------------------: | :---------------------: | +| NixOS/Nixpkg | 254 | 122 | 168 | +| Home-assistanceant/core | 70 | 113 | 134 | +| microsoft/vscode | 16 | 70 | 287 | +| MicrosoftDocs/azure-docs | 129 | 169 | 21 | +| pytorch/pytorch | 60 | 53 | 187 | +| godotenine/godot | 63 | 131 | 330 | +| flutter/futter | 31 | 91 | 419 | +| odooo/odoo | 55 | 19 | 32 | +| digitalinnovationone/dio-lab-open-source | 0 | 0 | 0 | +| microsoft/winget-pkgs | 49 | 11 | 18 | + +The results showed: + +- Across communities, we can observe the typical funnel model of an evolutionary path from explorers to participants to contributors and committers. In godotengine/godot, for example, 330 contributors successfully evolved to committers, 131 participants became contributors, while 63 explorers evolved to participants. This trend was also observed in other communities and is consistent with the general evolution of community members from initial exploration to deeper involvement. +- In some communities, such as NixOS/nixpkgs, we observed many contributors evolving into committers. In this community, 254 contributors successfully evolved into committers, which may represent a relatively high demand for code review. This may encourage more contributors to become deeply involved in maintenance, which may help improve the quality and stability of the community's code. +- In some communities, such as flutter/flutter and godotengine/godot, we observed a relatively high number of successful conversions of explorers into participants. In flutter/flutter, 419 explorers evolved into participants, while in godotengine/godot, 330 explorers turned into participants. +- The digitalinnovationone/dio-lab-open-source project has no data since it was created in 2023. + +### 7.4 Robot account analysis + +Robotic (bot) automation is a significant contributor to open-source collaboration platforms. This section analyzes nearly 600 million repository events across 7.7 million open-source repositories and over 1,200 bot accounts for 2023. + +#### 7.4.1 Analysis of active data of robots + +
+ 7-21 + 7-20 +
+ +
Figure 7.20 Trend in number of robot events (left) & percentage of robot events in 2023 (right)
+
+ +Analyzing the robotics activity data from 2015 to 2023, some of the observations are as follows: + +Since 2019, the number of bot events has increased significantly, rising from 4,217,635 to 304,257,084. This surge in bot account activity on GitHub can be attributed to the widespread adoption and advancement of GitHub's automation, continuous integration, and continuous deployment (CI/CD) tools between 2019 and 2021. + +Despite the small number of bot accounts, each bot serves multiple repositories, demonstrating efficiency and broad reach. + +#### 7.4.2 Analysis of event types for robots + +![7-22.png](/image/data/chapter_7/7-22.png) + +
Figure 7.21 Difference in number and annual growth rate (%) of GitHub event counts (2022 vs 2023)
+
+ +This graph shows the change in the number of GitHub events by type and their growth rate between 2022 and 2023. By comparing the data from these two years, we can gain insight into the trend of bot account usage in the development process: + +- Dominance of Code Push: PushEvent dominates bot account activity, with a significant rise in volume especially in 2023, suggesting that bot accounts play an important role in code maintenance and updates; +- Changes in project creation activity: CreateEvent is very active in 2022, but declines in 2023, which may indicate a decline in bot account activity in creating new projects; +- Importance of code review and collaboration: PullRequestEvent and IssueCommentEvent numbers were higher in both years, showing the active participation of bot accounts in code reviews and issue discussions; +- Changes in activity types: DeleteEvent decreases in 2023 compared to 2022, while ReleaseEvent increases, reflecting the different focus of robotic accounts in project lifecycle management; +- Increase in annotation-related events: CommitCommentEvent and PullRequestReviewCommentEvent increased in 2023, indicating that bot accounts are becoming more active in the code review process with discussions and feedback; +- Specific uses of bot accounts: less common event types such as GollumEvent, MemberEvent, PublicEvent, and WatchEvent are relatively low in number, suggesting that bot accounts are primarily used for specific automation tasks and are less involved in social interactions. + +#### 7.4.3 Distribution of working hours for robot accounts + +Similar to the developer working hours distribution, we also analyzed the data on the working hours of bot accounts. + +![7-23.png](/image/data/chapter_7/7-23.png) + +
Figure 7.22 Distribution of robot account working hours
+
+ +- The working hour distribution of the robot account is mainly centered on 0am to 1am and 12pm to 13pm; +- Based on the global developer time zones it can be surmised that most automated processes are more active in the early morning and midday hours; +- Robot work active time is less relevant to workdays and non-workdays, most automated collaborative tasks are scheduled, and fewer are related to responding to a contributor's event. + +#### 7.4.4 GitHub's top list of incidents for collaborative bots + +![7-24.png](/image/data/chapter_7/7-24.png) + +
Figure 7.23 2023 GitHub's top list of incidents for collaborative bots
+
+ +## 8. Case Studies + +### 8.1 openEuler Community Case Study + +In 2023, the OpenDigger community integrated Gitee data for the first time, allowing Gitee projects to participate in OpenRank calculations. The openEuler community surpassed PaddlePaddle in the same year, achieving an OpenRank value of 16,728. This made it the second largest open source community in China, after openHarmony. + +In 2023, the openEuler community attracted 3,941 developers to collaborate on Issues or PRs, with 1,934 contributors successfully contributing and merging at least one PR to the openEuler community's repository. + +It's worth noting that the openEuler community started a document bug hunt in early 2023. They also integrated an interactive page contribution mechanism with Gitee on the community's official document website. This feature enables developers to correct any errors they find while reading the documents directly on the official website. With just a single click, they can launch Gitee lightweight pull requests (PRs), without having to jump to the Gitee platform or perform Git operations. + +The data change from this innovative mechanism is impressive. In 2023, the openeuler/docs repository incorporated 7,764 PRs, 74% of which were submitted directly through the official web page. The launch of this mechanism also significantly increased the average number of active contributors per month (from 30 to 80), and the average number of PRs merged per month (from 116 to 722). + +One noteworthy project is openeuler/mugen, which is a highly active testing framework project within the openEuler community. In 2023, 138 developers participated in discussions and contributed to the project, with 95 successfully joining PR. The project has the third-highest OpenRank within the openEuler community, after the openeuler/docs documentation repository and the openeuler/kernel kernel repository. This excellent testing framework enables developers to quickly write and test cases to verify the correctness and validity of their contributions, significantly reducing the cost of subsequent contributions. + +To summarize, the openEuler community has achieved a high OpenRank value thanks to its effective contribution mechanism and testing framework. The community has designed an interactive system that allows for easy documentation contribution with minimal costs. Moreover, contributors can quickly verify the accuracy of their code through a reliable testing framework. These developer experience optimizations are excellent examples for other open-source communities to follow and implement. + +### 8.2 List of top repositories contributed by Chinese developers + +We analyzed how Chinese developers contributed to the top 30 repositories in the OpenRank ranking list for 2023 using data from almost 10 million GitHub developer accounts, including nearly 200,000 from China: + +![8-1.png](/image/data/chapter_8/8-1.png) + +
Figure 8.1 Top 30 Contributed Repositories by Chinese Developers on GitHub
+
+ +Most of the projects are represented in the master OpenRank list, the more interesting ones include: + +- [NixOS/Nixpkgs](https://github.com/NixOS/nixpkgs):It's also a top international project, a package management tool for a new operating system, and while most of the updates are package information updates, it also means that the ecosystem of that operating system itself is thriving. + +- [Intel-analytics/BigDL](https://github.com/intel-analytics/BigDL):a runtime repository was created to run LLM on the Intel XPU in 2017. However, it became nearly obsolete by the end of 2021. Surprisingly, it made a comeback with the rise of LLM in 2022 and now maintains an active size of around 50 people per month. + +
+ 8-2 +
+ +
Figure 8.2 BigDL OpenRank Trend Chart +
+
+ +> Screenshot above from [HyperCRX](https://github.com/hypertrons/hypertrons-crx) + +- [siyuan-note/siyuan](https://github.com/siyuan-note/siyuan):Siyuan Notes, a privacy-first domestic open source knowledge management tool, supports bidirectional knowledge block-level references and maintains an active community size of one hundred people per month. Supports subscription commercialisation at a very affordable price. + +- [baidu/amis](https://github.com/baidu/amis):is an open-source low-code page generation framework developed by Baidu. In recent years, low-code projects have gained immense popularity, such as Ali's open-source LowcodeEngine, Harmony ecosystem family's DevEco Studio, etc. These projects have provided great convenience for developers to rapidly develop applications using low-code. + +- [Cocos/cocos-engine](https://github.com/cocos/cocos-engine):domestic game engine leader, with the rise of the concept of meta-verse, godot and other game engines become the world's important top open source projects, and domestic game engine cocos/cocos-engine also has excellent performance in China. + +- [MaaAssistantArknights/MaaAssistantArknights](https://github.com/MaaAssistantArknights/MaaAssistantArknights) This is a fascinating project aimed at automating daily quests for the game Tomorrow's Ark using a script assistant. The automation can be achieved through a mobile phone simulator. The project is community-maintained, open source, free, and supports all desktop platforms. It has received over 10,000 stars and has more than 300 active contributors every month, which is fantastic. + +![8-3.png](/image/data/chapter_8/8-3.png) + +
Figure 8.3 MaaAssistantArknights Project Screenshot +
+
diff --git a/en/index.md b/en/index.md new file mode 100644 index 0000000..a3a225f --- /dev/null +++ b/en/index.md @@ -0,0 +1,284 @@ +--- +# https://vitepress.dev/reference/default-theme-home-page +layout: home + +hero: + name: "2023 China Open Source Annual Report" + text: "" + tagline: Kaiyuanshe collaborates with open-source communities and organizations to publish an annual report on global and China's open-source trends. The report provides valuable insights into the latest developments in the dynamic open-source field. + + actions: + - theme: brand + text: Read 2023 Annual Report Immediately + link: /en/preface + - theme: alt + text: Previous Reports + link: https://kaiyuanshe.feishu.cn/wiki/wikcnUDeVll6PNzw900yPV71Sxd + +features: + - icon: + src: "/image/home/KaiYuanShe-logo.png" + width: 40 + height: 40 + title: KAIYUANSHE + details: KAIYUANSHE is a non-profit, vendor-neutral, open-source community formed in 2014. It comprises individual volunteers who contribute towards the cause of open source. The community envisions being "rooted in China, contributing globally, and promoting open-source as a way of life in the new era." Its mission is to achieve "open-source governance, global connection, community development, and project incubation." Its community governance principles are to practice "Contribution, Consensus, and Collegiality." The community's goal is to create a healthy and sustainable open-source ecosystem. + link: https://kaiyuanshe.cn/ + linkText: website + - icon: + src: "/image/home/yunqi_partnets_logo.jpg" + width: 40 + height: 40 + title: Yunqi Partners + details: Yunqi is a research-based venture capital firm founded in 2014 in China. Its investment focuses on technology innovation and industry empowerment, covering various areas such as advanced manufacturing, enterprise software, cutting-edge technology, and industrial supply chain technology.Yunqi has been consistently ranked among China's Top 10 Best Early Stage Investment Firms by Zero2IPO, China Venture, and 36Kr. As an early-stage lead investor, Yunqi has invested in over 170 startups, out of which 30 have emerged as industry leaders, including Qifu Technology (NASDAQ:QFIN), Intco Medical (SZ:300677), Intco Recycling (SH:688087), Kujiale, Baibu, Deeproute.ai, MiniMax, KEENON Robotics, XTransfer, Worldwide Logistics, and Takfung. Besides, Yunqi also collaborates in co-creating the open-source ecosystem and has led investments in PingCAP, Zilliz, Jina AI, RisingWave, TabbyML, and several other open-source firms. Along with KAIYUANSHE, it has produced the open-source commercialization chapter of the China Open Source Annual Report in 2021, 2022, and 2023. + link: https://www.yunqi.vc/ + linkText: website + - icon: + src: "/image/home/x_lab2017_logo.jpg" + width: 40 + height: 40 + title: X-lab + details: X-lab Open Lab is a community dedicated to open-source research and innovation. It comprises experts, scholars, and engineers from domestic and international universities, startups, and various Internet and IT companies. They focus on open innovation in the open-source software industry and come from diverse professional backgrounds, including computer science, software engineering, data science, business administration, sociology, economics, and other interdisciplinary fields. They have been practicing open source strategy, open source measurement, open source digital ecosystem, and other related topics for a long time. The group has significantly contributed to open-source governance standard development, open-source community behavior metrics and analysis, open-source community process automation, and open-source domain-wide data governance and insights. + link: https://github.com/X-lab2017 + linkText: GitHub +--- + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Xue Guo,Willem Jiang,Tao Jiang,tison,Jianfan Wei,Jack Yu

+ + + + + +
+ + +
diff --git a/en/open-source-milestones.md b/en/open-source-milestones.md new file mode 100644 index 0000000..87558eb --- /dev/null +++ b/en/open-source-milestones.md @@ -0,0 +1,712 @@ +--- +outline: deep +--- +# OSS Chronicle + +## Overview + +Why do we include a considerable amount of international open-source news in the Open Source Chronicle section of the China Open Source Annual Report? These are the significant events that Chinese open-source enthusiasts must be aware of, and they are the crucial events that impact China's open-source community or will do so in the future. + +The Open Source Chronicle reflects the foremost open-source events of 2023 that have captured the attention of editorial volunteers from diverse backgrounds. The overarching theme underpinning our thought process is exploring open-source technologies' vast potential and accompanying benefits for a wide range of stakeholders. As editorial volunteers, we are committed to ensuring that our coverage of this landmark event is comprehensive, objective, and informative, enabling our readers to understand better the latest trends and developments in the open-source domain. + +* Disruptive innovations in global "**Open-Source Technologies**" such as artificial intelligence and machine learning, are the main theme throughout the Chronicle; +* Global conflicts resulting from geopolitical dynamics indirectly impact "**Open-Source Ecology**", regardless of East vs. West; +* This has resulted in a shift towards "**Open-Source Governance**" in all areas, including regions, law, trade, and communities; +* whereas the topic "**Open Source Security**" is considered a top priority; +* The growth of "**Open Source Commercialization**" is a promising trend, and though 2023 may pose some challenges, it's encouraging to know that there is an abundance of open-source startups thriving worldwide, including in China.; +* In today's world, technology, ecology, governance, and commercialization are undergoing significant changes. This has made "**Open Source Education**" a crucial foundation for exploring new possibilities. Artificial intelligence is a prime example of disruptive innovation that requires persistent research and a robust higher education system to achieve its current level of success. +* The last part of the "**Open Source Ranklists, Papers and Reports**" is like a delightful dessert after dinner. It will be fascinating to observe if it provides valuable insights and accurately predicts the future of open-source development in China. We can only know for sure by the end of 2024. + + +This year, AI is present in all categories. A holistic approach is necessary for full comprehension. + +In brief, we stand on the brink of a world where AI will transform the way things work. We hope to meet you at next year's Open Source Chronicle! + +## 1. Open Source Technology Chronicle + +### 1.1 Artificial Intelligence and Large Models + +- **ZHIPU AI - GLM** +ZHIPU AI has open-sourced the ChatGLM-6B series, ChatGLM-6B is an open-source dialogue language model that supports bilingual Q&A. In addition, ZHIPU AI has open-sourced VisualGLM-6B (CogVLM), a multi-modal dialogue model, which combines the capabilities of image processing and natural language processing to support both Chinese and English dialogues, aiming to provide a richer and more intuitive interactive experience. +- **Baichuan** +Over the past year, Baichuan has released several versions of large models, including Baichuan-7B. Later, they launched the 13B model and the Baichuan2 series of models, and made the base and chat versions open-source. One of the latest models, Baichuan2-192K, has a large size and a context window length of 192K. +- **Intern general large model system** +Shanghai Artificial Intelligence Laboratory (AIL) released the newly upgraded "Intern General Large Model System", which includes three basic models, including Intern Multimodal - Large Model, InternLM - Large Language Model and InternLandMark - Large-scale 3D Neural Radiance Field, as well as the first Full-Chain Open Source System for the research, development and application of large models. +- **Alibaba - Qwen** +Alibaba open-sourced the 7B model of Tongyi QianWen (Qwen), and then successively open-sourced the base and chat models of 1.8B, 14B, and 72B, and provided the quantised versions of the corresponding int4 and int8. In the multimodal scenarios, QianWen also open-sourced the two multimodal models of vision and speech, qwen-vl and qwen-audio. +- **Kunlun - Skywork** +Kunlun Inc. released the 10 billion large language model "Skywork" Skywork-13B series and open-sourced the 600GB, 150B Tokens large and high-quality open-source Chinese dataset. Skypile/Chinese-Web-Text-150B dataset. +- **RWKV** +RWKV has been continuously open-sourced since its release as a non-Transformer structured model for large languages. In 2023, RWKV has released multiple versions and entered LF AI & Data for incubation. +- **Inspur - Yuan 2.0** +Inspur Electronic Information Industry Co., Ltd. officially released "Yuan 2.0", a 100-billion base model. This series of models is fully open-sourced and commercially available, including three versions with parameter values of 102B (102.6 billion), 51B (51.8 billion), and 2B (2.1 billion). Compared with Source 1.0, Source 2.0 has improved programming, reasoning, and logic. +- **01.AI - Yi** +In November 2023, 01.AI released the Yi series of models with parameter sizes between 6 and 34 billion and 30 billion tokens of training data. +- **Fire-Flyer Quant - DeepSeek** +DeepSeek, a division of High-Flyer Quant, has released its 67B open-source large model. DeepSeek has open-sourced the 7B and 67B scale models, which contain a base model (base) and an instruction tuning model (chat). No application is required, and it is free for commercial use. At the same time, the project team has also opened nine model checkpoints in the middle of training for download. +- **Ant Group - CodeFuse** +Ant Group has open-sourced CodeFuse-13B and CodeFuse-CodeLlama-34B for CodeLlama, which currently supports a variety of code-related tasks such as code completion, text-to-code, and unit test generation. The open source includes the MFT (Multi-Task Fine-Tuning) framework, a dataset for enhancing the coding capabilities of LLMs, and a deployment framework. + +- **Meta Llama 2** +In July 2023, Meta announced the Llama 2 project and disclosed that they had successfully open-sourced three pre-trained models at different scales, which included the 7B, 13B, and 70B parameter versions. These models were trained on a massive 2 trillion token scale during the pre-training phase. In the Supervised Fine-Tuning (SFT) phase, they were fine-tuned with over 100,000 pieces of data to improve their performance on specific tasks. Additionally, Meta made the Llama2-Chat model open-source, which is SFT-optimized based on conversation data. Furthermore, Meta is continuing to open-source the CodeLlama programming language large model. +- **Mixtral 8x7B** +In December 2023, Mixtral open-sourced the Mixture of Experts (MoE) open-source model Mixtral 8x7B, commercially available under the Apache 2.0 license. Mixtral-8x7B is a Mixtrue of Experts consisting of eight networks of experts with 7 billion parameters, a structure that improves the model's efficiency in processing information and reduces operating costs. +- **Falcon 180B** +Falcon 180B is an open source large language model released by the Technology Innovation Institute (TII). The model has 180 billion parameters and was trained using TII's RefinedWeb dataset. +- **Arabic AI Large Models Jais Open Sourced** +A team of UAE researchers has announced the open-sourcing of the Arabic large model Jais. Jais is a bilingual Arabic-English large language model pre-trained with 13 billion parameters. +- **Microsoft open-sourced visual foundation model Visual ChatGPT** +Microsoft has launched Visual ChatGPT, an open-source project that combines OpenAI's ChatGPT with a series of Visual Foundation Models (VFMs) to enable users to send and receive images during chats. The project aims to extend the functionality of ChatGPT so that it can not only process text but also understand and generate images, thus enabling a multimodal interactive experience. +- **NVIDIA officially open sourced TensorRT-LLM** +NVIDIA has officially released an optimized open-source library called TensorRT-LLM. This library helps to speed up the performance of large language models on AI GPUs such as Hopper. In order to test the performance, NVIDIA compared H100 with TensorRT-LLM-enabled H100, both based on A100. The results showed that in GPT-J 6B inference, the performance of H100 was 4 times better than A100, while the performance of TensorRT-LLM-enabled H100 was 8 times better than A100. +- **Elon Musk drives the efforts of X (formerly Twitter) to open source its recommendation algorithm** +X (Twitter) has released two repositories on GitHub (main repo , mlrepo) that cover much of the Twitter source code including recommendation algorithms, including the mechanisms used to control the tweets users see on the For You timeline. +- **Hugging Face changes its Text Generation Inference (TGI) licence** +Hugging Face has announced that in the latest release of TGI v1.0, its open-source license will change from Apache 2.0 to HFOIL 1.0. HFOIL stands for Hugging Face Optimized Inference License, which is HuggingFace's specifically designed license agreement for optimized inference solutions. +- **Hugging Face has open sourced Rust-based machine learning framework Candle** +Hugging Face recently open-sourced Candle, a novel and small Rust ML framework that runs extremely fast and supports a wide range of powerful models. It provides support for GPUs and has an optimised CPU backend that runs in the browser. Candle also includes several pre-trained models and use cases, such as speech recognition models, generic LLMs, computer vision models, and more. +- **Alibaba has open sourced AnyText** +Alibaba has recently released a multi-language visual text generation and editing model called AnyText. This model allows users to create text that is comparable to that of a professional Photoshop editor. With AnyText, users can customize the location, strength, intensity, and number of text seeds that appear in a picture. +- **Jina AI launches world's first open source 8K Text embedding model** +Jina AI announced the release of the Jina-embeddings-v2 model, an open source product that supports 8K (8,192 tokens) context lengths and is similar in functionality and performance to OpenAI's text-embedding-ada-002. + +### 1.2 Operating Systems and Programming Languages + +- **The Long Term Support (LTS) version of Linux kernel now has 2-year maintenance period instead of 6** +The Linux kernel LTS releases were extended to six years in 2017. Recently, a tweak was made to the policy. Jonathan Corbet of Linux Weekly News said it doesn't make sense to maintain old kernels for so long because they're not used much. +- **India's Ministry of Defence develops its own Linux distribution, Maya OS, to fully replace Windows** +India's Ministry of Defence has announced a significant overhaul of its cybersecurity system. It plans to replace the Windows operating system with a Linux distribution called Maya in all its networked computers. The move is in response to the growing threat of malware and ransomware attacks. It aims at promoting independent innovation and reducing dependence on foreign software. +- **Red Hat Announces CentOS 7 and RHEL 7 end of support on 30 June 2024** +Red Hat has recently announced the discontinuation of support for CentOS 7 and RHEL 7. In addition, the complete source code for RHEL will no longer be publicly available. To maintain compatibility and support, downstream distributions of RHEL (such as CentOS, Rocky Linux, AlmaLinux, etc.) will need to recompile and release their versions within 30 days. +
However, it is important to note that Red Hat has assured the CentOS community that it will not be going away. Community contributors and CentOS users will still be able to collaborate on open-source Linux distributions that are part of the CentOS Stream project. +- **Google's open-source browser project Chromium announces the use of Rust** +Google has posted a blog post announcing that it will support using third-party Rust libraries from C++ in Chromium, with plans to include Rust code in Chrome binaries by the end of the year. It also said that Rust, a Mozilla-developed programming language that offers security along with high performance, was initially designed to be used for writing browsers, so it's only fitting that open-source operating systems like Chromium rely on the technology. +- **Open-source operating system openKylin 1.0 officially released, already supports Arm, RISC-V** +The latest version of openKylin, version 0.9, now supports Arm and RISC-V. Additionally, the new openKylin 1.0 version comes with 6.1+5.15 dual kernels by default, along with independent selection and upgrade of 20+ operating system core components. The latest version also adds many new features and fixes more than a thousand bugs, improving the overall stability and compatibility of the system to provide users with a better experience. +- **Huawei officially releases HarmonyOS 4** +Huawei officially released the HarmonyOS 4 operating system. The new HarmonyOS 4 is said to have breakthroughs in privacy and security, AI large model capability, and personalized interaction. +- **fit2cloud open sourced 1Panel** +1Panel is a modern, open-source Linux server operation and management panel that provides users with accessible server-building and management resource services. +- **AWS open source specific language Cedar Cedar** +AWS has released Cedar as open source. Cedar is a domain-specific language that enables defining policy access permissions. It is integrated into Amazon Verified Permissions and AWS Verified Access. Cedar can also be integrated into applications through SDKs and language specifications.
+Cedar allows defining access policies separately from the application code, which facilitates writing, analyzing, and auditing the policies independently. Cedar supports both Role-Based Access Control (RBAC) and Attribute-Based Access Control (ABAC). +- **Microsoft releases Guidance language** +Microsoft has introduced a domain-specific language called Guidance, designed to enhance developers' ability to manage contemporary language models. The new framework integrates generation, prompting, and logic control into a unified development process. The programming language enables developers to 'organize generation, prompting and logic control into a continuous flow that matches how the language model processes text.'
+It integrates seamlessly with providers such as the Hugging Face model. It combines an intelligent seed-based generation caching system and token healing to optimize prompt boundaries and remove bias in the lexical slicing process. + + +### 1.3 Hardware Technology and the Internet of Things + +- **China supports the building of humanoid robot open source communities** + + In October 2023, China's Ministry of Industry and Information Technology (MIIT) released the Guiding Opinions on the Innovative Development of Humanoid Robots. The document proposed the establishment of an open-source community for humanoid robots, which would promote the development of open-source foundations, provide support for key enterprise open-source projects, and encourage collaboration and innovation among developers around the world. + +- **Stanford University unveiled Mobile ALOHA, an open source robot** + + In March 2023, Stanford University unveiled Mobile ALOHA (A Low-cost Open-source Hardware System), an open-source robot that can perform fine tasks via teleoperation, and by the end of 2023 is ready to be autonomously operated for simple tasks through joint training. + +- **Tesla Open Sourced Roadster Runner Design and Engineering Details** + + Musk wrote on the social platform that Tesla is “fully open source” for the design and engineering details of its first-generation Roadster and has published research and development documents that are accessible to all. + +- **openKylin officially joined the RISC-V Foundation** + + The openKylin community has recently become a member of the industry consortium of the RISC-V Foundation, with the aim to contribute towards the development of the RISC-V ecosystem. They intend to build an operating system that is in harmony with the hardware and software ecosystem of the RISC-V architecture. + +- **Ali T-Head open-sourced the XuanTie RISC-V family of processors** + + Ali T-Head has made the XuanTie RISC-V series of processors open-source along with a range of tools and system software. This marks the first time a complete open-source stack of processors and necessary software has been made available, which will aid in the advancement of the RISC-V architecture, expedite the integration of RISC-V hardware and software technologies, and facilitate the adoption of innovative solutions. + +- **AMD Open Sourced FSR** + + AMD open-sourced FSR (FidelityFX Super Resolution)3 under the MIT license, an upsampling technology that competes with NVIDIA's DLSS, but unlike DLSS, it doesn't rely on a proprietary CUDA core and is software-based. + +- **Baidu open sourced its messaging middleware BifroMQ** + + China's Baidu has open-sourced BifroMQ, its high-performance and distributed messaging middleware. BifroMQ uses serverless architecture and has native multi-tenancy support. Developed over years by Baidu's IoT team, it facilitates IoT device connectivity and messaging systems at scale. + +### 1.4 Data Infra + +- **DragonflyDB 1.0** + + DragonflyDB is a modern open-source in-memory database that is compatible with the Redis and Memcached APIs. It is a viable alternative to both, as it requires no code changes during migration. The development team recently released DragonflyDB version 1.0, stating that it is ready for production use. DragonflyDB 1.0 supports the most common data types and commands of Redis, as well as snapshots, master-slave replication, high availability, and other features. + +- **FerretDB 1.0 officially released** + + FerretDB 1.0, an open source alternative to MongoDB, has been released. FerretDB wants to bring MongoDB database workloads back to their open source roots, enabling PostgreSQL and other database backends to run MongoDB workloads, preserving the opportunities offered by the existing MongoDB ecosystem. + +- **Apache Doris Version 2.0.0 Released** + + The official release of Apache Doris version 2.0.0 was on August 11, 2023. Over 275 contributors submitted more than 4,100 optimizations and fixes, resulting in significant improvements. Specifically, the blind query performance on the standard Benchmark dataset has improved by over 10x in version 2.0.0 of Apache Doris. + +- **Apache SeaTunnel Graduated into Apache Top Level Project** + + Apache SeaTunnel is the first Chinese domestic-led, top-tier project in Big Data Integration to contribute to the ASF. Apache SeaTunnel, formerly known as Waterdrop, changed its name to SeaTunnel in October 2021 and applied for membership in the Apache Incubator. SeaTunnel is an easy-to-use, ultra-high-performance, distributed data integration platform that supports SeaTunnel is a very easy to use, ultra-high performance distributed data integration platform that supports real-time synchronization of massive amounts of data. + +- **Aliyun's open-source graph computation engine GraphScope performance tops authoritative lists** + + GraphScope, which is an open-source graph computing engine developed by Aliyun, has set a new record in the "LDBC SNB Interactive" graph benchmark list. It achieved a throughput rate of over 30,000 QPS when a single node performed a graph database query, which is twice as fast as the previous record holder.GraphScope, the open-source graph computing engine from Aliyun, has broken the record of the international authoritative graph benchmark "LDBC SNB Interactive" list with a throughput rate of more than 30,000 QPS for a single node executing a graph database query, which is twice the performance of the previous record holder. + +- **Baidu open-sourced its high-performance search engine Puck** + + Baidu announced that it has open-sourced its self-developed search engine, Puck, under the Apache 2.0 protocol, the first open-source vector search engine for large data sets in China. + +- **ByteDance Open Sourced ByConity** + + ByteHouse has recently released its kernel to the community as ByConity, under the Apache 2.0 license agreement. + + ByConity is an open-source cloud-native data warehouse that is based on the ClickHouse kernel but comes with a new storage-computation separation architecture. It supports several essential features such as tenant resource isolation, elastic scaling up and down, storage-computation separation, and strong consistency between data reading and writing. ByConity aims to provide a reliable and scalable solution for data storage and computation in a cloud-native environment. + +- **Ali open sourced multi-database client tool Chat2DB** + + Chat2DB is an open-source and free multi-database client tool that supports local installation of Windows and Mac, server-side deployment, and web page access. Compared with traditional database client software Navicat and DBeaver, Chat2DB integrates AIGC's ability to convert natural language to SQL and SQL to natural language and can give developers SQL optimization suggestions. + +- **ApeCloud open sourced KubeBlocks** + + KubeBlocks is an open-source system for managing and running data infrastructure on K8s. It helps developers, platform engineers, and SREs deploy and maintain a dedicated DBPaaS in the enterprise across various public and private cloud environments. KubeBlocks is the only open-source multi-engine data/database management system project in CNCF Cloud Native LANDSCAPE, supporting 32 databases, such as MySQL, PG, MongoDB, Redis, Kafka, Pulsar, and more. + +### 1.5 Cloud Computing and Infrastructure Software + +- **GragGAN Got 20,000 Stars One Day After Open Source** + + DragGAN is an image editing tool that was developed by Google researchers in collaboration with the Max Planck Institute for Informatics and MIT CSAIL. It allows users to easily adjust the position, pose, expression, size, and angle of the subject in a photo by manipulating the pixel points and orientation in the image. This intuitive tool is designed to make image editing quick and effortless. + +- **LLMOps platform Dify.AI code is completely open-source** + + Dify.AI, the LLMOps platform, has announced that 46,558 lines of code are completely open-source and has temporarily decided to relax the open-source protocol from AGPL to Apache 2.0. + +- **Huawei open-sourced cross-end, cross-framework, cross-version enterprise application front-end component library OpenTiny and high-performance service grid Kmesh** + + OpenTiny is a development kit for building web application front-ends using Vue2/Vue3/Angular. It includes a theme configuration system, back-end templates, a CLI command line, and other tool libraries. + + Kmesh's high-performance service grid offers developers a new level of grid performance through innovative architecture. It leverages eBPF+ programmable kernel technology to achieve OS-native service grid data plane capabilities. Traffic governance is integrated into the OS, which significantly enhances the accessibility of grid services and improves their access performance. + +- **Baidu Intelligent Cloud Releases Open Source QianFan SDK Version** + + Baidu Intelligent Cloud officially released the Python SDK (QianFan SDK) version, which is fully open source and available for free download and use by enterprises and developers. + +- **Volcano Engine Self-developed Universal Multimedia Processing Framework BMF** + + Volcano engine is an open source, cross-language multimedia processing framework that offers flexibility and scalability. It includes the BMF (Babit Multimedia Framework), an eight-bit multimedia processing framework, and provides a simple and easy-to-use interface. + + Dynamically manage and reuse video processing capabilities in a modular way to build high-performance multimedia processing links using Graph, enabling efficient project production for multimedia users. + +- **ByteDance Released and Open Sourced Rspack** + + Rspack is a Rust-based bundler that has completed support for the Webpack Loader architecture. It is incubated by the ByteDance Web Infra team and offers high performance, customizability, and compatibility with the Webpack ecosystem. + +## 2. Open Source Ecology Chronicle + +An interesting phenomenon is that if something good happens in the open source community, it should mostly be written in the business chapter. And if something bad happens, it can mostly be filed in the ecology chapter. Of course, it's not just bad things, there's also some good news, as well as policies in various countries that can have a profound impact on the open source ecosystem. + +### 2.1 Leading enterprises are laying off open source workforce + +From the beginning of January, there have been rumours of layoffs at Google, GitHub and GItLab, and even companies like Red Hat are also laying off staff, and then there are news of layoffs vaguely disclosed by various large domestic companies. Although this chronicle focuses on the open source ecosystem and the situation of open source people, objectively speaking, it is true that the big companies are not specifically trying to lay off open source talent. It's just that once the layoffs start, the open source people within the company, will look "suspicious" and will be pressed with the question: what value have you really created for the company? And that's a question that's never easy to answer in a serious, positive way! + +### 2.2 Famous open source gurus struggle to make ends meet + +The next news is even more sobering. The 12,000 people Google laid off, dubbed the "Golden 12K", included some famous open source bigwigs. For example: Chris DiBona, who founded Google's OSPO 19 years ago, and Samba co-founder Jeremy Allison, 61, reluctantly tweeted, "Just got fired from Google. If anyone needs SMB 1/2/3 protocol or open source experience, I'd be interested". + +There are also some famous open source people who have suffered even worse, let's briefly list the news titles: + +- "The author of the open source framework NanUI turned to selling steel, and the development of the project was suspended" +- "10 months in prison, internet busts, and struggling to earn a living! Behind the 9 billion downloads of an open source project is 9 years of work". +- "Due to lack of funds, the full-time developer confesses: there may be no future for this open-source software! +- "Unemployed due to mania, author of acclaimed open-source project begs for money online" +- "The roots of free open source software have collapsed," complains the head of core-js, who has the entire modern web on his back, but has given up on open source for lacking of money" +- "Another popular open source project announces cessation of functionality as funding critically falls short“ + +It's really a case of "hear no evil, see no evil". In last year's Open Source Chronicle, we were still talking about the "twilight of individual heroism". Today, the trend has become more and more obvious. + +:::info Expert Review +**Wei Jianfan**: If you are playing open source with a playful mindset, it's great, don't think about the money problem. If the livelihood issues have not been resolved, do not devote yourself to open source, as a hobby is good. Because open source itself is not used to make money. +::: + +### 2.3 Well-known open source projects are ceasing development one after another + +In 2023, there are a number of notable open source projects, both domestically and internationally, that have announced the cessation of development for different reasons. + +The most outrageous one probably has to be AetherSX2, one of the best PlayStation 2 emulators on the Android platform. The developers had no choice but to announce the cessation of development because they suffered "endless impersonations, complaints, unreasonable demands, and even death threats" + +Most intolerable of all, aardio, a programming language focused on desktop software development, whose author announced that he no longer had the energy to maintain the project due to his wife's cancer. + +There are also some common reasons, such as the developer's company is short of money or out of business: Touca, libjpeg-turbo; and developers lost interest or no longer have the energy to maintain: Peek, wangEditor, lodash; and technology outdated: Mokee. + +### 2.4 40 years's rugged journey of the Free Software Foundation + +On 27 September 1983, Richard Matthew Stallman (RMS) announced the "GNU Project" to develop a Unix-like free software operating system, and in doing so, launched the Free Software Movement. In 2023, the Free Software Foundation also published an article celebrating forty years of GNU and the Free Software Movement. + +FSF Executive Director Zoë Kooyman said that "GNU is not only the most widely used operating system based on free software, but is also at the heart of the philosophy that has guided the free software movement for forty years. We hope that the 40th anniversary will inspire more hackers to join GNU in its goal of creating, improving and sharing free software around the world." + +However, also in April 2023, an article was published claiming that after nearly 40 years, the Free Software Foundation (FSF) was dying. The author argued that "the FSF has failed to focus on spreading the free software philosophy, developing, distributing, and promoting copyleft licences, and overseeing the health of the core concepts of the free software movement, while at the same time devoting its resources to other, unproductive tasks". + +In fact, we do talk more about open source software than free software these days. So has the Free Software Movement fulfilled its destiny, or is it likely to be revitalised through reform? + +### 2.5 Ageing of the open source community + +The aging of the open source community is an unavoidable phenomenon. Even the ever-tempered Linus Torvalds has begun to curb his temper and talk about "the aging of the kernel community". The Postgres community is also aging, with the main developer being 68 years old. There's also news of the death of Bram Moolenaar, the father of Vim, and the death of Thien-Thi Nguyen, a contributor to the GNU Free Software Project. What should we think about the phenomenon of "aging"? + +In fact, we should see more young people joining the open source community, but they tend to join younger projects that are more interesting and newer, rather than older projects with a long history. + +Maybe what we should really think about is: do those old open source projects really have to be active and release new versions all the time? + +### 2.6 Some encouraging news on China's open-source efforts + +There's still a lot of good news in China's open source community, such as the official report in April: "The number of China's open source software developers exceeded 8 million". + +In January 2023, Apache Linkis, Apache Kyuubi, Apache bRPC; in February, Apache EventMesh; in June, Apache SeaTunnel, Apache Kvrocks, one after another, officially graduated to become the Apache Software Foundation top-level projects. In February, Jina AI officially donated DocArray to the Linux Foundation, Paralus officially became a sandbox project of the CNCF Foundation, and in July, the Istio project officially graduated from the CNCF. + +openKylin officially joined the RISC-V Foundation, Huawei became China's first PyTorch Foundation Premier member, and Jiang Ning was re-elected as a director of the board of the Apache Software Foundation for the year 2023, all of which show that we are still actively participating in the international open source ecosystem, and are continuing to play an important role in it. + +In February 2023, after ALC (Apache Local Community) Beijing and Shenzhen, ALC also set up Xi'an chapter. At the same time, KAIYUANSHE also launched the KCC (Kaiyuanshe City Community) programme, which by the end of the year had grown to eleven cities, including Beijing, Changsha, Chengdu, Dalian, Hangzhou, Nanjing, Guangzhou, Shanghai, Shenzhen, Singapore and Silicon Valley. + +In March 2023, after the OpenAtom Open Source Foundation, China's second open source foundation, the CHANCE Foundation, was officially established in Chongqing. Later, it also launched "SigStore China Community", "Open Source Innovation Education Alliance", etc., and now three open source projects have been officially donated to the CHANCE Foundation. We look forward to the establishment of more quality foundations in China, for the world, in the future. + +**2023 open source related conferences / activities** + +- February + - Shenzhen:First OpenHarmony Conference +- March + - Beijing:The 1st OSPO Summit + - Beijing:DevTogether Summit +- April + - Suzhou:Mobile Cloud Conference - Open Source Forum + - Shanghai:openEuler Developer Day +- May + - Shanghai:Global Open Source Technology Summit (GOTC) +- June + - Beijing:BAAI Conference — AI Open Source Forum + - Beijing:OpenAtom Global Open Source Summit + - Beijing:18th Open Source China Open Source World Summit +- July + - Beijing:China Internet Conference - Open Source Supply Chain Forum + - Taipei:2023 COSCUP (Conference for Open Source Coders, Users & Promoters) +- August: + - Shanghai:World Artificial Intelligence Conference - Open Source Learning Forum + - Beijing:CommunityOverCode Asia 2023 +- September + - Shanghai:KubeCon + CloudNativeCon + Open Source Summit + - Shanghai:GOSIM (Global Open Source Innovation Conference) + - Shanghai:2023 INCLUSION.Conference on the Bund - Open Source Forum + - Beijing:Open Source Cloud Alliance for Industry (OSCAR) Conference +- October + - Wuhan:CHANCE Foundation Diverse Cooperation Summit + - Changsha:CCF ChinaOSC + - Changsha:1024 Programmer Festival + - Chengo:COSCon 8th Annual China Open Source Conference +- December + - Beijing:OpenInfra Days China 2023 + - Sanya:OpenCS (Open-source Computer Systems) 2023 + - Beijing:Operating System Congress & openEuler Summit + - Wuxi:OpenAtom Developer Conference + - Shanghai:Open Source Industry Ecological Conference + +### 2.7 The Impact of national policies on the open source ecosystem + +When it comes to open source ecology, it is necessary to mention the open source-related policies formulated by various countries and regions, all of which will have an all-round impact on the open source community, business and ecology. Simply summarised, they can be divided into the following categories: + +- **Government policies to support open source -** as reported in July 2023, a study found that "27% of the UK's total tech value-added comes from open source, valued at £13.59bn", and in China there are a range of policies in place, from the central government to the local level. There is dedicated support for specific open source projects (Shenzhen), targeted funding for specific foundation projects (Beijing), and promotion of the integration of open source technology with specific industries, to name but a few. We will see in the coming years how much impact this will have on the open source industry and ecosystem. +- **The emergence of open source as a weapon in international competition -** whether it's Github blocking developer contributions from Russian companies, a US lawmaker proposing to restrict Chinese development in the RISC-V space, and a wide variety of "export-restrictive" policies that have been put in place or attempted to be put in place - makes the following Reuters report, "Open-source software becoming a key part of trade war," seem imminent! +- **Around the open source security, the policy level also has a lot of action-** whether it is the United States, the European Union or China, have introduced a series of "open source security", "AI compliance" related bills and regulations. This also makes the open source community mixed feelings, happy that the security field is getting more and more attention from the government, and worried that unreasonable policies and regulations may hamper the development of open source technology. + +## 3. Open Source Governance Chronicle +Open-source governance can be divided into three categories: community governance, project governance, and risk governance. Risk governance encompasses different types of risks such as ethical and social risks, legal compliance risks (including licenses), supply chain risks, security risks, and more. Given the importance of open-source security, we have included a separate chronology of open-source security events in the fifth part of this article. + +In 2023, a significant breakthrough in the development of Artificial Intelligence (AI) caused widespread debate among experts worldwide. Whether or not to limit the pace of AI development was discussed. At the same time, major geopolitical powers such as the EU, the US, and China focused on creating legislation to regulate AI. Furthermore, open-source technology played a crucial role in catalyzing the development of AI, leading to efforts to define open-source AI. + +During 2023, major open-source foundations and organizations from around the globe held online and offline discussions. They aimed to encourage policymakers and legislators worldwide to work together to face the challenges brought by the new era of AI through open-source cooperation and reject techno-nationalism and geopolitical hostility. Despite their efforts, the fragmented global open-source communities, particularly those from Asia and China, still need to gain significant influence on policymakers. Therefore, more attention and collaboration are necessary to address this issue. + +We have given more importance to the crucial events related to open-source AI governance this year. Due to limited space, several project governance events have been included in the community and risk governance categories and will not be listed separately. + + +### 3.1 Community Governance + +#### 3.1.1 Controversies in the Rust community + +The Rust community underwent a series of crises and governance changes in 2023. Here are some of the major events and outcomes: + +- The Rust programming language team faced internal disagreements and created a new Leadership Council to decentralize authority. External experts were attacked by some core members, causing them to leave and leading to resignations. These conflicts led to the announcement of a new programming language called Crab. Crab's developers wanted more support with Rust's design, aiming to be more flexible, efficient, and faithful to Rust's original intent and philosophy. +- The Rust Foundation's new Trademark Policy sparked community opposition over concerns that it could limit Rust's growth and innovation. The Foundation apologized, acknowledged its shortcomings, and promised to revisit and revise the policy while engaging in more dialogue with the community. +- The management of the Rust community faced issues again recently. The organizers of RustConf removed some scheduled keynote speakers without informing them, which led to an outcry and protests within the community. As a result, some well-known Rust developers and speakers decided to withdraw not only from RustConf, but also from the Rust community as a whole. +- Graydon Hoare, the founder of the Rust language, said in an interview that he was helpless and frustrated by the conflict and division in the Rust community, and that he believed that Rust had deviated from his original vision and goals, and that he was no longer able to control and save Rust, and that he hoped that the community would solve the problem on its own and leave him alone. + +While the Rust language went through some community crises and governance changes in 2023, it also published a roadmap for 2024 that focuses on three directions: lowering the barrier to learning, expanding the ecosystem, and improving the development process. + +The design team for the Rust language has stated that their goal is to simplify the program so that developers only have to deal with the inherent complexity of their domain and no longer have to deal with the unintended complexity of Rust, and also to give library authors more power and flexibility to meet the needs and innovations of their users. + +In addition, some observers believe that the Rust language is evolving toward ease of use as it proves its stability, performance, and productivity in 2021. Rust will likely see explosive growth as the cost of learning and use decreases even further. The focus on security, concurrency, and performance, and the growing adoption of the language as a language designed not only for today's challenges but also for the challenges of the future, suggests that the Rust language will be here to stay, but that community governance will remain a top priority must be addressed. + +#### 3.1.2 Controversies in the Red Hat community +Red Hat sparked a storm in the open source world in 2023 involving the source distribution and licensing of its two Linux distributions, RHEL (Red Hat Enterprise Linux) and CentOS (Community Enterprise Operating System). Here are some of the major events and outcomes: + +- Red Hat, a popular software company, recently made an announcement that it will no longer share the complete source code of RHEL (Red Hat Enterprise Linux) publicly. Instead, it will only provide patches and updates. Additionally, downstream distributions of RHEL (like CentOS, Rocky Linux, and AlmaLinux) will need to recompile and release their versions within 30 days to maintain compatibility and support for RHEL. This decision has caused controversy among the open-source community. Many believe that Red Hat's actions go against the principles of open-source software and that the company is prioritizing profits over the spirit of open-source. The decision has also created difficulties and pressure for downstream distributions of RHEL. +- Red Hat has responded to this, stating that they have not broken their commitment to open source, but rather to protect the brand and quality of RHEL from some bad behavior and abuse, as well as to encourage more users and developers to use RHEL directly and enjoy the services and support it provides. +- CentOS, as the most extensive downstream distribution of RHEL, has been hit the hardest. Its ecosystem and community are facing a crisis of fragmentation and decline, and some users and developers have turned to other Linux distributions, such as Debian, Ubuntu, Fedora, etc., believing that CentOS has already lost its meaning and value of existence. +- Both Oracle and SUSE took advantage of the opportunity to mock and provoke Red Hat, stating that they would continue to support and maintain RHEL's downstream distributions and even invested heavily in creating their own RHEL offshoots, such as Oracle Linux and SUSE Linux Enterprise Server, in an attempt to capture RHEL's market and users. +- Red Hat has released a statement once again to explain why they are changing their RHEL source code release strategy. According to the statement, the company is making this change to improve the security, stability, and reliability of RHEL. The change will also promote innovation and development of RHEL. Red Hat assures that they still respect and support the open source community and welcomes more collaboration and feedback from it. + +### 3.2 Risk Governance + +#### 3.2.1 Ethics and social risks +AI technology development and application have triggered several ethical, moral, and societal risk debates and concerns related to human safety, freedom, privacy, and responsibility. The following are some of the significant events and viewpoints: + +- **Over 1,000 tech leaders and researchers, including Elon Musk**, have called for artificial intelligence labs to suspend the development of advanced systems, warning in an open letter that AI tools pose significant risks to society and humanity. Conversely, Hongyi Zhou, CEO of 360, believes that not developing AI is the biggest insecurity. According to him, AI can help humans solve many problems, and its use can be regulated through laws and regulations. +- **A 22-word statement signed by nearly 400 experts and scholars in the field of AI**, including Geoffrey Hinton, the godfather of AI, Sam Altman, CEO of OpenAI, and Ilya Sutskever, its Chief Scientist, warns that AI could extinguish the human race! It states: "Mitigating the risk of extinction from AI should be a global priority alongside other societal-scale risks such as pandemics and nuclear war". +- **In July 2023, many open-source foundations and organizations from around the world held an international conference in Geneva** with the aim of exploring the relationship between AI and open-source, including the challenges and opportunities involved. The conference concluded that open source is essential in promoting AI innovation and cooperation, and is an effective means of ensuring AI ethics and social responsibility. Many experts from around the globe pointed out that open source is an inevitable trend in the development of AI. They also highlighted that open source makes AI research and application more transparent, fair, and credible. It allows more people to participate and contribute to the development of AI, preventing monopolization and abuse of AI. +- **Three Turing Award winners in the AI field - Andrew Ng, Geoffrey Hinton, and Yoshua Bengio - engaged in a lively debate on social media**. Their discussion focused on the U.S. government's restrictions and bans on AI technology. Andrew Ng criticized the U.S. bans, stating that they hinder the open exchange of AI and are detrimental to AI development and innovation. However, Hinton and Bengio argued that the U.S. bans are necessary controls on AI for security and ethical reasons. + +The emergence and utilization of AI technology reflect the diverse ideologies and values worldwide, and their influence on the ongoing humanitarian crisis. AI is not merely a technological issue, but also a political, economic, and social one that necessitates international consensus and cooperation for the creation of sustainable and equitable AI development. + +#### 3.2.2 AI Laws, Regulations and Policy Documents are emerging globally +In 2023, several laws, regulations, and policy documents related to AI were issued on a global scale. These included: +- The Interim Measures for the Administration of Generative Artificial Intelligence Services were jointly announced by seven Chinese ministries and commissions, including China's National Internet Information Office. +- The Global Initiative on Artificial Intelligence Governance was issued by the Office of the Central Committee of the Communist Party of China's Committee on Cybersecurity and Informatization. +- The Executive Order on Safe, Reliable, and Trustworthy AI issued by the U.S. White House. +- The European Parliament, the EU member states, and the European Commission agreed upon the Artificial Intelligence Act. +- The Bletchley Declaration is an international declaration signed by representatives of the governments of 28 countries and the EU. + +The documents of China's Global AI Governance Initiative and the European Union's Artificial Intelligence Act reflect the importance of promoting and protecting open source AI technology. For instance, China's initiative encourages the world to work together towards the healthy development of AI, sharing the knowledge and open sourcing AI technology. The EU's AI Act specifies that it does not apply to AI components provided under free and open source licenses unless they are part of a general base model or prohibited AI practices, or subject to transparency obligations as part of an AI system. + +#### 3.2.3 Global open-source organizations are addressing new AI governance challenges +In June 2023, the Open Source Initiative (OSI) initiated the Defining Open Source AI campaign, which included online and offline global discussions and events to address the challenges of open-source AI governance. During the campaign, Kaiyuanshe actively participated in the mailing list discussions and organized the translation of the webinar series. The draft document of the Definition of Open Source AI, which has been published, consists of a preamble, a definition of Open Source AI, and a list of evaluation licenses. The document focuses on authorizing the use, study, modification, and sharing of AI systems. + +The Apache Software Foundation published Generative AI Guidelines for Contributors in June 2023. The guidelines help contributors who use AI-generated code, documents, and images for ASF projects. They recommend disclosing the AI-generated part of contributions and labeling it as "Generated-by: ". The + +China Academy of Information and Communications Technology (CAICT) released a report titled "Compilation of Trusted Open Source Large Model Cases (Phase I)" in December 2023. The report provides a comprehensive overview of China's open-source large model industry, including technical aspects, application scenarios, business models, governance, and development trends. It serves as a reference guide for developing China's large model industry and analyzes the technology ecology of open-source large models and the industry chain. + +#### 3.2.4 Open-source AI large models call for new types of licenses +Open source is becoming mainstream for AI large models, but traditional licenses can't meet their unique needs. New licenses are being explored. + +The Open Source Initiative declared Meta's LLaMa license not open source due to commercial use limitations and purpose of use restrictions. Falcon-40B was also challenged for using a custom license with special restrictions and had to change to Apache 2.0. Hugging Face changed TGI's license from Apache 2.0 to HFOIL due to restrictions on selling hosted or managed services on TGI. + +By 2023, Hugging Face will have almost half a million models available with different licenses, including Apache 2.0, MIT, and OpenRAIL. The OpenRAIL license is an upgrade from RAIL and has behavioral restrictions. It includes licenses for source code, applications, models, and data: OpenRAIL-S, OpenRAIL-A, OpenRAIL-M, and OpenRAIL-D. + +China's domestic standards and research institutions are actively promoting innovative AI licensing practices. In May 2023, the China Academy of Information and Communications Technology (CAICT) jointly compiled and released the "Zhiyuan Open AI Model License Version 1." This license regulates the use of models (including their derivatives and supporting materials) but does not apply to the training data of the models. In August 2023, the Shanghai Jiao Tong University Intelligent Court Research Institute, along with the Artificial Intelligence Research Institute and Shanghai Magnolia Open Source and Open Research Institute, organized a workshop on designing the framework of the Mulan-Magnolia Open Data License 2.0. The license's function is to provide an open license for AI data. In December 2023, the OpenAtom Open Source Foundation and the Magnolia Open Source Community, the OpenI Qizhi Community, and other communities jointly developed the Mulan-Qizhi Model License (Beta). The license applies to models obtained through algorithmic training and supplementary materials, including model structure, parameters, weights, etc. However, it excludes the training models' algorithms and algorithmic source code. +> **Commentary**: +> Wei Jianfan: I believe that these disputes will soon cease to exist, and as long as the law is clear, all similar problems will be solved. + +#### 3.2.5 The development of China's open-source field standard is gaining momentum. +China supports open-source standards and will develop standards for open-source terminology, licenses, interoperability, project maturity, community operation, governance, and supply chain management for open-source software. + +A new national standard for evaluating the open source code security of software products was drafted in April 2023 by the National Information Security Standardization Technical Committee, led by the China Academy of Information and Communications Technology. It is now open for public comment. + +In July 2023, the Chinese Electronics Industry Standardization Technology Association (CESTA) approved three group standards related to open-source technology. These standards provide guidelines for open-source governance and project evaluation. They include T/CESA 1269-2023 Information Technology Open Source Terminology and Overview, T/CESA 1270.1-2023 Information Technology Open Source Governance Part 1: Overall Framework, and T/CESA 1270.4-2023 Information Technology Open Source Governance Part 4: Project Evaluation Model. The Chinese Research Institute of Electronic Technology (CRIET) released these standards. T/CESA 1270.4-2023 Information Technology Open Source Governance Part 4: Project Evaluation Model and three other open-source group standards have been approved and published for use. + +In September 2023, the Chinese Academy for Electronic Technology Standardization formally approved four open-source standards. The China Electronics Industry Standardization Technology Association examined and approved these standards. The agreed standards are T/CESA 1270.2-2023 Information Technology Open Source Governance Part 2: Enterprise Governance Assessment Model, T/CESA 1270.3-2023 Information Technology Open Source Governance Part 3: Community Governance Framework, T/CESA 1270.5-2023 Information Technology Open Source Governance Part 5: Open Source Contributor Assessment Model, and T/CESA 1291-2023 Information Technology Open Source Metadata General Requirements. The Community Governance Framework, Open Source Contributor Assessment Model, and Information Technology Open Source Metadata Requirements are among the four open-source standards that have been formally approved and released. + +In October 2023, two open-source software group standards were approved and released. These standards were titled "Open Source Software Governance Evaluation Methods Part 3: Maturity Models" and "Open Source Software Governance Evaluation Methods Part 5: Governance Tools and Platforms". The China Academy of Information and Communications Research (CAICR) led the development of these standards, and they were reviewed and supported by the China Association for Communications Standardization (CAICS). + +## 4. Open Source Security Chronicle +In today's digital age, software has become an essential element that supports the normal functioning of our society. However, as the software supply chain becomes more complex, so do the security issues. The Log4Shell vulnerability recently brought open-source security into the spotlight. Despite 2022 being touted as the "Year of Supply Chain Security," the vulnerability is still widespread, and the rate of adoption of fixes is low. As a result, the frequency of attacks in the software supply chain has skyrocketed. The broad adoption of open-source code has turned supply chain security into an existential issue. Log4Shell has made headlines as it revealed the security risks present in the open-source community. Moreover, other projects that are heavily reliant on open-source in the ecosystem may have a more extensive reach and more severe consequences than Log4Shell. Supply chain attacks are on a sharp upward trend, averaging 742% annual growth since 2019. Therefore, we need to focus on improving the security of open-source software. + +### 4.1 Latest trends and challenges +An analysis of the latest trends and challenges in open-source security, including the following: +- **Malware as a Service**: Hackers use open source code and tools to develop and distribute malware, creating a massive black market that threatens the security of the open source ecosystem. +- **Human Errors**: Open-source projects are vulnerable to attacks due to human errors, such as ignoring security updates, using weak passwords, and leaking sensitive information by developers and maintainers working with open-source code. +- **Supply Chain Attacks**: occur when hackers inject malicious code into open-source projects by manipulating repositories, dependency packages, or update channels, thereby compromising the reliability and trust of these projects. +- **Legal Risks**: Open source projects may face legal risks in complying with license agreements, dealing with copyright disputes, responding to policy changes, etc., which must be identified and resolved promptly. +- **Security Standards**: Open-source communities and organizations are developing and promoting some security standards and best practices, such as SLSA, OpenSSF, CII, etc., to improve the quality and security of open-source code. +- **Security Tools**: Open source projects can utilize some open source or commercial security tools, such as Snyk, Dependabot, CodeQL, etc., to detect and fix security vulnerabilities and improve security protection. +- **Security Education**: Open source projects need to strengthen security education and training, improve the security awareness and skills of developers and maintainers, establish a security culture and process, and prevent security risks. +- **Security Cooperation**: Open source projects must strengthen security cooperation with other open source projects, organizations, enterprises, governments, etc., share security information and resources, form a security community, and jointly address security threats. +- **Security Outlook**: The security landscape for open-source projects presents a mixed prospect. While the prevalence of increasingly intricate and severe security challenges is noteworthy, open-source projects are fortified by a sturdy and dynamic security force. + +### 4.2 Legal liability of open source security +There is an ongoing debate concerning the legal liabilities of open-source software regarding security. The prevailing argument and accompanying legislation state that the authors of open-source software bear responsibility for any vulnerabilities detected in the code. Despite being offered free of charge, the authors are expected to guarantee the quality and security of their software. Vulnerabilities can cause significant harm, such as the compromise of user data and system attacks, making it imperative for authors to fix identified weaknesses and inform users promptly and swiftly. As such, the current trend in global legislation is to hold open-source legally accountable for cybersecurity. + +- In **China**, providers of network products and services are forbidden from developing malicious programs. They must take immediate corrective action, promptly notify users according to regulations, and report to the relevant authorities if security flaws, loopholes, or risks are identified in their network products and services. Furthermore, network product and service providers must maintain ongoing security for their products and services. They are not permitted to terminate the provision of security maintenance within the period specified or agreed upon by the parties. If a network product or service can collect user information, its provider must obtain explicit consent and communicate this to users. Moreover, if personal information is involved, the provider must comply with the relevant laws and administrative regulations on personal information protection. +- **The EU Cyber Resilience Act (CRA)** aims to strengthen the cybersecurity of digital products in the EU by consolidating the existing cybersecurity regulatory framework. The Act imposes many cybersecurity requirements on digital products, including software. The Act is closely linked to the Highly Common Cybersecurity Directive (NIS 2 Directive), the Cybersecurity Act, the Artificial Intelligence Act, and the General Data Protection Regulation (GDPR). It could become one of the most critical EU cybersecurity laws. + +### 4.3 Some important open source security incidents in 2023 + +#### 4.3.1 Log4j vulnerability resurrection +LLog4j is a tool for developers to track their programs. In Dec. 2020, a severe issue let hackers control computers using Log4j. Alibaba and Amazon were affected. The Log4j team quickly fixed it in Jan. 2021 with Log4j 2.15.0. + +However, Log4j 2.15.0 has a new vulnerability, CVE-2021-44228. Attackers can exploit a Java problem by sending specific log messages. Log4j 2.16.0 turns off the Java feature in log messages to address this. Users must upgrade now and turn off unused logging. Use firewalls and intrusion detection to block malicious traffic. + +#### 4.3.2 Linux malware growth rate soars to 50% +Linux malware surged 50% to 1.9 million threats in 2022, with Trojans, botnets, ransomware and mining software used to steal data, control devices, and extort money. Infections spread through web services, email, web pages, and mobile devices exploiting vulnerabilities, weak passwords and social engineering. To protect against Linux malware, regularly update systems and software, use strong passwords and two-factor authentication, install reliable anti-virus software, and avoid opening suspicious links and attachments. + +#### 4.3.3 New threats to the npm supply chain: "manifest confusion" +Manifest Confusion is a security problem that affects the npm registration process. Attackers exploit this vulnerability to hide harmful code or dependencies by providing incorrect manifest information that does not match the contents of a tarball package. This security issue can affect millions of npm users and projects, potentially leading to the theft of sensitive information, execution of remote commands, spreading malware, and more. Developers and maintainers can prevent this vulnerability by using npm shrinkwrap or package-lock.json to lock down dependency versions, using npm audit, avoiding installing packages from untrusted sources or mirrors, and checking that the manifest information matches the contents of the tarball package before releasing it. + +#### 4.3.4 Electron's shocking Level 10 vulnerability! +Electron is a framework for cross-platform desktop apps. It has a significant vulnerability that lets hackers use a bad link to run harmful code. Apple and Google warned about this, but many apps have not been updated. The vulnerability is caused by an old version of Chromium. To fix it, Electron needs to use a newer version of Chromium. The Electron team has already released a new version that fixes the problem. It's important for developers to keep their software up-to-date and secure. Finally, developers should always check their code for vulnerabilities. + +#### 4.3.5 Google awards $12 million for solving 2,900 vulnerabilities +Google's Vulnerability Reward Program (VRP) paid $12M in bonuses to security researchers from 68 countries who found 2,900 vulnerabilities in 2022. The highest reward for a single vulnerability was $605,000. The VRP now covers Google Nest and Fitbit. + +#### 4.3.6 GitHub adds SBOM export feature to make it easier to comply with security requirements +GitHub's new feature helps developers quickly create and export software build bills of materials (SBOMs) to enhance security and transparency. SBOM documents contain information about the software components and dependencies used in the codebase. The resulting SBOM can be accessed from GitHub's Security Tab and exported as SPDX or CycloneDX-format files. + +#### 4.3.7 OpenAI, Google, Microsoft and others create $10 million AI security fund +Tech companies and research organizations, including OpenAI, Google, and Microsoft, have created a $10 million fund for AI security and ethics research. The goal is to promote responsible and trustworthy AI development, prevent risks, and encourage more participation. The Fund will be managed by an independent committee, which will select the most outstanding projects for funding. + +In brief, open-source software necessitates enhanced security risk governance mechanisms, including quality standards, security audits, vulnerability rewards, and shared responsibility. Similarly, open-source software necessitates more significant investment and support, including financial resources, workforce, and community engagement. The future of open-source software development relies on our response to the current situation and our ability to establish a more sustainable and secure open-source ecosystem. + +## 5. Open Source Commercialization Chronicle + +### 5.1 Early stage financing activities + +- **DBeaver, an Open Source Database Management Tool, Secures $6 Million in Angel Round Funding** + + Open-sourced in 2013, DBeaver is a free and open-source general-purpose database management and development tool based on Java and running on a variety of operating systems. Its founders formed a commercialization company in 2017 to provide enterprise-level support and develop an enterprise version. DBeaver currently has 8 million users and more than 5,000 paying customers, including IBM, Samsung, and Moody's. + +- **Open Source Large Model Company Together Raises $20 Million in Funding** + + Open-source prominent model startup Together, which hopes to "lead the Linux moment in AI by providing an open ecosystem across computational and best-in-class fundamental models," has secured a $20 million seed round of funding. Together is building a cloud-based platform for running, training, and fine-tuning open source models. One of Together's first projects, RedPajama, aims to foster open-source, generative models. Together now has a 1.2 trillion token training dataset in open source, allowing for commercialization. + +- **Union AI, an open source AI and data stream orchestration platform, secures $19.1 million in Series A funding** + + Union AI provides Flyte hosting services (orchestrating ETL, machine learning workflows), has also built Pandera (a data testing framework) and Union ML (a framework that sits on top of Flyte to help teams build and deploy models using their existing toolsets), and this year launched the Union Cloud, which received $19.1 million in Series A funding funded by NEA. + +- **MindDB, an Open Source DB for AI Company, Secures $25 Million Seed Funding Round** + + MindsDB is a platform that operates in the "DB for AI" scenario, connecting data and models using an AI-Table approach. This approach turns machine learning models into virtual tables in the database, enabling users to model directly in the database. It eliminates tedious steps such as data processing and building machine learning models and accelerates AI applications. In 2023, MindsDB received consecutive funding rounds totaling nearly $50 million. + +- **Star Open Source LLM Company Mistral AI Raises Multiple Round of Funding, Ranks Among Unicorns** + + Mistral AI, founded by scientists from Meta and Google, recently released the open-source MOE large model Mixtral 8X7B, which has attracted enormous attention. Mistral AI has also completed multiple rounds of funding in the last year, securing $415 million in its most recent Series A round, and is currently valued at over $2 billion. + +- **Model Continuous Testing Validation Tool Deepchecks Raises $14M Angel Round** + + Israeli company Deepchecks is positioned in the ML continuous test validation space, which allows customers to reuse and customize components to test ML models and datasets comprehensively. deepchecks launched an open-source version of its ML testing tool in 2020 and, earlier this year, launched a commercial version of the Deepchecks Hub. + + To date, the open source product Deepchecks has been downloaded more than 500,000 times, and its users include AWS, Booking.com, and Wix, among others. Deepchecks recently announced a $14 million angel round of funding. + +- **Endor Labs, an Open Source Component Supply Chain Security Platform, Raises $70 Million in Series A Funding** + + Endor is positioned to help organizations monitor the security posture of their development pipeline, including both reachable and exploitable risks, manage developer access to code, and keep a close eye on the secrets hard-coded into their code base. They recently secured $70 million in Series A funding led by Lightspeed Venture Partners. + +- **AutoGPT Closed $12 Million Financing Round** + + AutoGPT uses language models such as GPT-4 and GPT-3.5 to build multifunctional intelligence that can perform tasks independently and continuously improve performance. The project has been online for over 50 days and has 131k stars and 26.7k forks, making it one of the fastest-growing projects in GitHub's history. + +### 5.2 Mid to late stage financing activities + + +- **UK-based MLOps company Seldon secures $20 million in Series B funding** + + Seldon was established in 2014 to address the issues of deployment, monitoring, management and interpretation of AI Model at the production level.2020 A Revolving Finance to date, Seldon's open-source product installation has increased by YoY 400%. + +- **Temporal Secured $75 Million in Funding** + + Temporal, a startup based on Cadence, Uber's open-source distributed task orchestration and scheduling engine, has secured $7,500 million in a new round of funding, giving it a pre-investment valuation of $1.4 billion. + +- **SAST/SCA Open Source Security Vendor Semgrep Raises Series C Funding** + + Semgrep entered the SAST space with its SAST (Static Application Security Testing) engine, which users can integrate with their CI/CD processes and code hosting platforms such as Github, Gitlab, etc., to inspect code using Semgrep's built-in and customized rules. Semgrep open-sourced its product in 2020 and already has over 2 million users and 7.5x revenue growth in 2022 compared to 2021. + +- **French AI Research Lab Kyutai Receives $330 Million Investment, Aims to Open Source All Results** + + French billionaire and CEO of Iliad, Xavier Niel, has started an AI research lab in Paris called Kyutai. It's a privately funded non-profit organization focusing on research in artificial general intelligence. The lab has raised nearly €300 million in funding so far. Kyutai focuses on basic AI modeling, supported by top-tier computing resources in the form of Nvidia H100 GPUs from Scaleway. + +- **Open Source Platform Replicate Secures $40 Million in Series B Funding** + + Replicate, an open-source machine learning modeling platform has announced the successful completion of a $40 million Series B funding round led by Andreessen Horowitz (a16z) to continue to enhance its open-source machine learning modeling platform. + +### 5.3 Mergers and acquisitions (M&A) + +- **AMD Acquires Open Source AI Software Nod.ai** + + AMD announced on its website the signing of a definitive agreement to acquire Nod.ai, which will accelerate the deployment of optimized artificial intelligence solutions on AMD's high-performance platforms and enhance AMD's open-source software strategy. + +- **Snowflake intends to Acquire Ponder to Enhance Its Data Cloud Python Capabilities** + + Ponder is a leading company that connects popular data science libraries to where the data is and maintains the widely used open-source library Modin for scalable Pandas operations. Snowflake has announced its intent to acquire Ponder to serve Python data practitioners better. + +- **Cisco announces plans to acquire cloud-native cybersecurity startup Isovalent** + + Isovalent is committed to developing two critical open-source technologies, eBPF and Cilium, that provide deep insight into operating systems and cloud-native applications. Isovalent is essential to the Cloud Native Computing Foundation (CNCF) and the eBPF Foundation. Continued community support is vital to keep these open-source projects active. + +## 6. Open Source Education Chronicle + +In the China Open Source Annual Report, a new element, "Open Source Education," has been added to its list of milestones for the year. The definition of open-source education may vary among distinct organizations. This chapter aims to provide unambiguous clarity by defining open-source education as the utilization of open-source software and open educational resources to support educational goals. **This encompasses the utilization of open-source software tools, teaching materials, and instructional resources, while promoting knowledge sharing and collaboration. One of the primary objectives of open-source education is to offer more inclusive and equitable educational opportunities, thereby enabling more individuals to access high-quality educational resources.** + +In the open-source education model, educational resources like teaching plans, course content, and software tools are openly available to everyone. This means that anyone can use, modify, and share them. This model is highly beneficial in fostering students' innovative thinking, collaborative skills, and problem-solving abilities. By participating in open-source projects, students can gain exposure to the latest technologies and tools in the industry. They can also get a better understanding of the actual process of software development and contribute their own strengths to the open-source community. + +At the same time, as the drafting party of the report, "Open Source Education" is not unfamiliar to the Open Source Society. Since its establishment in 2014, the Open Source Society has actively explored the integration of open source and education. Before formally introducing the milestones of open source education in 2023, let's review the work done by the Open Source Society in the field of open source education: + +- In 2014, the Open Source Society initiated the first series of open source on-campus activities in China—"Open Sourcers on Tour"; +- In 2017, the Open Source Society's executive committee established working groups dedicated to open-source education, such as the Open Education Group and University Collaboration Group; +- In 2018, the Open Source Society held the third China Open Source Conference (COSCon'18), which produced China's first "Open Source Education Track"; +- In 2019, the Open Source Society, in partnership with East China Normal University, established China's first "Open Source Education Fund"; +- In 2020, the Open Source Society produced the "Open Source Bootcamp" series, aiming to provide introductory training for open source education; +- In 2021, the Open Source Society invited six guests to share their insights on open-source education at the sixth China Open Source Conference (COSCon'21), and for the first time, invited open-source students from universities to discuss open-source education topics; +- In 2022, the Open Source Society actively began to explore directions related to open source education and training, such as specialized corporate open source training; +- In 2023, the Open Source Society, for the first time, set up a "Youth Open Source Education" track at the eighth China Open Source Conference (COSCon'23), inviting young students from primary and secondary schools to share their views on open source. + + +The integration of open-source principles with education has significantly deepened over the years. The Open Source Organization's development in "Open Source Education" has expanded the audience for open-source education from open-source communities to higher education institutions, secondary and primary schools, and even to a broader demographic of employed individuals. + +Despite this progress, there is still a global need for more skilled open-source professionals. The Linux Foundation's "10th Annual Open Source Jobs Report" reveals that 93% of employers struggle to find professionals with adequate open-source skills, and the situation is not improving. Nearly half of these employers (46%) plan to increase their recruitment of open-source talent in the next six months. Additionally, 73% of open-source professionals report ease in finding new employment opportunities to continue their open-source endeavors. + +This talent scarcity has elevated the importance of open-source education worldwide. China is actively fostering its open-source education landscape by encouraging participation in open-source community activities, soliciting contributions to open-source projects, establishing robust open-source education systems, and setting standards for evaluating open-source competencies. These efforts aim to stimulate a thriving open-source ecosystem and nurture talent. By doing so, students and professionals can gain a more profound understanding of the ethos behind open-source software, facilitating the integration of theory with practice, enhancing educational quality, and meeting the societal demand for innovative individuals. + +Lastly, let's review the significant milestones in China's open-source education journey during 2023. + +### 6.1 Open-source education has been thriving with interactive practices, project-based learning, and innovation competitions + +In 2023, China saw a significant increase in open-source educational activities. Some of the significant practical activities that took place include: + +- **Open Source Promotion Plan (OSPP)**: Guided by the Institute of Software, Chinese Academy of Sciences, this summer program aimed to encourage students to participate in open-source software development. In 2023, 3,475 students from 592 universities registered, and 504 students were successfully selected. + +- **GitLink Open Source Programming Summer Camp (GLCC)**: Hosted by the China Computer Federation, this event saw 341 students from 139 universities participate in 2023. + +- **The Sixth China Software Open Source Innovation Competition**: Guided by the Department of Information Science, National Natural Science Foundation of China, and hosted by CCF, this competition focuses on "bottleneck" software fields and cutting-edge technologies, with multiple tracks. + +- **The Twelfth "Kylin Cup" National Open Source Application Software Development Competition**: Guided by the China Software Industry Association, OpenAtom Foundation, the Open Source Development Committee of the China Computer Federation, and the China Open Source Software Promotion Alliance, this competition attracted 345 teams from over 60 universities. + +- **2023 OpenAtom Open Source Competition**: Hosted by the Ministry of Industry and Information Technology, the People's Government of Jiangsu Province, and the People's Government of Hunan Province, this competition aimed to unite open-source organizations, enterprises, institutions, universities, research institutes, industry organizations, and investment and financing institutions. + +- **The First China Postgraduate Operating System Open Source Innovation Competition**: Hosted by the China Postgraduate Innovation Practice Series Competitions, this event focused on open-source innovation in operating systems. + +Additionally, the 2023 Open Source and Information Consumption Competition – The Fourth Industrial APP and Information Consumption Competition, hosted by the Ministry of Industry and Information Technology and other organizations, helps promote open-source education to the professional workforce. + +These activities have enhanced students’ technical abilities and promoted the spread of open-source culture and the vitality of open-source communities, making significant contributions to the development of China’s open-source ecosystem. + +### 6.2 Domestic open-source software & hardware education theory foundation is forming + +In 2023, China's open-source education sector made significant progress in practice and saw a growing theoretical foundation. Teachers from higher education institutions and open-source experts began to pay more attention to the research on open-source education theory, publishing representative articles at different teaching levels and in various directions. These studies provided cases and theoretical analyses for open-source education, demonstrating its application potential in higher education and K12 education. + +**In higher education**: Open-source education is regarded as an innovative teaching model that helps students acquire software and hardware development skills. For example, teachers from Peking University, East China Normal University, and Shanghai University of International Business and Economics have researched the application and value of open-source education in their respective disciplinary teachings. + +**In K12 education**: Open-source education is often integrated with STEM, STEAM, robotics/uncrewed aerial vehicle (UAV) education, and maker education, mainly by including open-source hardware in teaching. For instance, teachers from Meihua Middle School in Zhuhai and Langya Road Primary School in Nanjing have explored the application of open-source hardware in project-based teaching. + +Additionally, the Shanghai Education Commission's Educational Technology and Equipment Center hosted a symposium on the development of educational UAV and open-source hardware course resources, showcasing the application of open-source hardware in primary and secondary education. The open-source robotics sports event at the 11th Primary and Secondary STEAM Education Conference also demonstrated cases and new trends of open-source education in science and technology education in schools. + +These activities and studies indicate that the future promotion of open-source education will differ between higher education and K12 education but will tend toward developing open-source general education and open-source software and hardware development education. Open-source education not only helps to enhance students' technical abilities but also promotes innovative thinking and teamwork spirit, contributing to the diversified development of China's education system. + +Articles exploring the integration of open-source education and higher education include: +- "Exploration of a Dual-Track Open-Source Teaching Model under Industry-Education Integration: A Case Study of Peking University's 'Open-Source Software Development Fundamentals and Practice' Course" by Jing Qi and Hui Feng. +- "Insights into the Future of Open-Source Education from the Digitalization of Open-Source Technology" by Wei Wang and Shengyu Zhao. +- "The Value and Significance of Introducing Open-Source Education into Higher Education Institutions" by Guofeng Zhang. +- "Research on Software Engineering Education Integrating Open-Source Software Ideas and Examples" by Huang Haowei. +- "Construction of a New Medical Mathematics Curriculum Group Based on Blockchain from the Perspective of Open-Source Education in Colleges" by Xiaona Wang, Dan Ding, and Ge Ban. +- "Cultivation of Innovative Software Talent in the Open-Source Ecosystem" by Tao Zhuo, Kai Wang, and Wei Ge. + +Articles exploring the integration of open-source education and K12 education include: +- "Exploration of Open-Source Hardware Project-Based Teaching Practice under the STEM Education Concept: A Case Study of 'Creative Illuminated Clothing'" by Suo Fang. +- "Promoting the Education of Young Masters Based on an Open-Source Architecture Project Research Community" by Yi Dong Qi. +- "Research on Open-Source Hardware Chips for Information Technology Education" by Lizhi Xin, Zhang Xiangling, Yao Ziming. +- "Injecting New Vitality into Education with Maker and Open-Source Hardware" by Jun Xi. + + +### 6.3 Open-source education forums are gaining momentum, with the open-source and education community continuing to grow. + +In 2023, the development of open-source education in China showed a clear upward trend, as evidenced by the increased number, frequency, and quality of conferences dedicated to discussing open-source education. These conferences highlighted the influence of open-source education and fostered deep communication and collaboration between the educational and open-source communities. + +Some notable conferences and forums include: +- **2023 GAIDC Global Developer Vanguard Conference**: This international developer conference featured an open-source technology forum showcasing the global application and development of open-source initiatives. +- **The 2nd China Open Source Education Symposium (SOSEC-2) and The 3rd China Open Source Education Symposium (SOSEC-3)**: Held in Guangzhou and Shanghai, respectively, these symposia focused on the current state and future trends of open-source education in China. +- **National College New Business Open Source Innovation Education Symposium**: Held in Shanghai, this event explored open-source applications in education, particularly its integration with business education. +- **The 4th China Computer Education Conference**: The first Computer Open Source Education Forum was part of this conference, emphasizing the importance of open-source in computer education. +- **2023 Zhongguancun Forum – World Open Source Innovation Development Forum**: Themed around "Open Science and Open Source Education," this forum discussed the role of open-source education in scientific research. +- **GOTC 2023**: Hosted the Linux Foundation's Open Source Education and Talent Development Summit, highlighting the crucial role of open-source technology in talent cultivation. +- **2023 OpenAtom Global Open Source Summit**: The successful convening of the Open Source Education and Talent Track further promoted the discussion and practice of open-source education globally. +- **COSCon'23 The 8th China Open Source Conference**: Featured a "Youth Open Source Education" track, inviting young OpenTeen primary and secondary school students to share their experiences with open-source practices. + +Hosting these events and forums increased the influence of open-source education in academia and industry. It provided a platform for educators, students, and open-source community members to exchange ideas, promoting the sharing of open-source educational resources and the dissemination of best practices. As open-source education forums rise in prominence, integrating open-source and education is becoming a new trend in educational innovation and talent development. + + +### 6.4 The cultivation and certification of open-source talent is gradually becoming a standardized system. + +In 2023, China's open-source education sector reached a significant milestone by initiating the "Open Source Talent Competency Requirements and Evaluation Standards." This standard is being developed under the leadership of the Ministry of Industry and Information Technology's Talent Exchange Center in partnership with the OpenAtom Foundation. The development meeting was attended by 36 experts from various universities and companies, including Beijing University of Aeronautics and Astronautics, Beijing Institute of Technology, East China Normal University, Huawei, Baidu, Tencent, and Xiaomi, signaling the formal inclusion of open-source talent education into the national talent cultivation strategy. Establishing this standard is crucial for constructing China's open-source talent development ecosystem, as it will help promote the high-quality development of open-source software and technology by establishing a set of scientific, industry-recognized talent competency standards through research, analysis, and refinement. + +Moreover, the training of open-source educators has become an essential area of exploration. For instance, the Changsha Software and Information Technology Service Industry Promotion Association hosted the 2023 Hunan Province Higher Education OpenHarmony Faculty Training, aimed at deepening college teachers' application and understanding of OpenHarmony, enhancing their ability to teach and develop based on OpenHarmony and building a robust educational information and creation ecosystem. + +These initiatives and developments indicate that China is actively establishing a standardized system for cultivating and certifying open-source talent. This will help enhance the professional capabilities of open-source talent and promote the widespread application and innovative development of open-source technology in the education sector. As the open-source education system continues to improve, more high-quality open-source talent is expected to emerge, starting from China, contributing to the global open-source community. + + +### 6.5 Enterprises are increasingly involved in open-source education, giving rise to a new model of industry-university-research cooperation. + +In 2023, Chinese enterprises significantly increased their involvement in open-source education, forging more open and in-depth partnerships with universities. These collaborations typically involve integrating real-world open-source projects into the academic setting, enabling students to engage in meaningful, high-caliber open-source initiatives rather than mere operational tasks. Here are examples of such corporate-academic collaborations: + +- **Answer Project**: This project, chosen as the capstone project for Peking University's Guanghua MBA program, allows students to participate in live open-source projects. +- **CloudWeGo Project**: Integrated into Peking University's graduate curriculum, it allows students to work on enterprise-backed open-source projects; it also collaborates with Nanjing University and Zhejiang University to foster campus partnerships and open-source talent development. +- **openKylin**: Established an academic station at Tianjin University of Science and Technology, focusing on cultivating open-source talent. +- **PingCAP**: Donated three years of partnership with the China Computer Federation (CCF) China Database Summer School, providing complete engineering practice experiments; signed a joint doctoral training agreement with East China Normal University to foster high-level talent in critical software. +- **OceanBase**: Collaborates with East China Normal University to tackle technical challenges and lead in distributed database research innovation and open-source talent cultivation. +- **StoneDB**: Completed the first intern training, attracting students from multiple renowned universities to focus on cultivating open-source database talent. +- **Tencent**: Supports open-source talent development through the "OpenAtom Campus Source Line" project and launched the RhinoBird Open Source Talent Plan for 2023, assisting in cultivating open-source talent at universities. +- **Shenkaihong**: Co-hosted an open-source Hongmeng talent training workshop with the Beijing Institute of Technology and established the "Open-source Hongmeng Talent Class" with multiple schools. +- **Tuowei Information**: Its subsidiary KAIHONGZhiGu was involved in the Yali Lu Gu Middle School project, which was selected as a "2023 Smart Education Excellence Case." +- **CSDC**: Collaborated with Beijing Institute of Technology and Shenkaihong to establish the first "Open-source Hongmeng Talent Class" in the Information Technology Innovation College. +- **Shenkaihong**: Collaborated with Southeast University to cultivate university open-source talent and promote the development of the OpenHarmony talent ecosystem. +- **Honghu Wanlian**Established a national OpenHarmony (Open-source Hongmeng) intelligent terminal and IoT industry integration of the production and education community in collaboration with multiple schools and companies. + +These partnerships offer students hands-on experience with real-world open-source projects and facilitate the exchange of knowledge and technology between enterprises and academia. Through these collaborations, enterprises gain insights into students' abilities and needs. In contrast, university students can collaborate directly with industry experts, which is invaluable for honing their technical skills and professional acumen. Furthermore, these partnerships contribute to advancing and popularizing open-source technology and generating innovative contributions to the open-source community. + + +### 6.6 University open-source education programs are becoming more robust, and universities are enthusiastic about participating in open-source projects. + +In 2023, Chinese universities have made significant strides in open-source education, with many institutions advancing the cause by introducing specialized courses, the establishment of alliances, and collaborations with enterprises. Tsinghua University, Beijing University of Aeronautics and Astronautics, Zhejiang University, Shanghai Jiao Tong University, East China Normal University, and nearly a hundred other universities nationwide have announced plans to roll out open-source software courses over the next three years. These courses will cover foundational subjects such as open-source professional technologies and digital public goods, aiding students in understanding the architecture of open-source knowledge from the ground up and accelerating the cultivation of talent in crucial software domains. Here are some specific examples: + +- **Peking University**: + - Collaborated with DouGe and GitLink to create an online practical course, "OSS Development: Open-Source Software Technology," combining theory and practice to develop students' open-source software development skills. +- **Tsinghua University**: + - Hosted a 2023 autumn and winter open-source operating system boot camp, where students honed their programming skills by writing the code in Rust. +- **East China Normal University**: + - Introduced the course "OSS101: Open-Source Software Literacy," which aims to cultivate students' open-source awareness and skills. + - Led establishing the CCF Information System Professional Committee's Open-Source Education Working Group and created an "Institution-Course-Competition-Certification" integrated open-source talent development system to drive the growth of open-source education. +- **South University of Science and Technology**: + - Participated in establishing an open-source university alliance at the Qizhi Developer Conference, which is dedicated to fostering the Greater Bay Area's open-source ecosystem and talent development and has a national impact. +- **Beijing Institute of Technology**: + - Collaborated with Shenkaihong to hold an open-source Hongmeng talent training and scientific research cooperation workshop, enhancing industry-academia collaboration and improving the quality of talent development. + +These initiatives and courses not only enrich the open-source education curriculum in universities but also boost student engagement in open-source projects. Through these practices, students gain a deeper understanding of the development process of open-source software, acquire relevant skills, and participate in the open-source community. These efforts are instrumental in nurturing high-quality open-source talent that meets the needs of modern digital economic development and promoting the popularization and application of open-source technology in China. + + +### 6.7 Diverse parties are driving the "Open Source into Campus" initiative to garner student interest. + +In 2023, one of the most noticeable activities in open-source education was the "Open Source into Campus" campaign, organized by various organizations such as the OpenAtom Open Source Foundation, the Open Source Development Committee of the China Computer Federation (CCF), the Open Source Promotion Plan (OSPP) organizing committee, and Hongshan Open Source. +- **OpenAtom Open Source Foundation** + - The OpenAtom Open Source Foundation and Tencent initiated the "OpenAtom Campus Source Tour" public welfare project. Together, they explore new paths of industry-education integration by establishing university open-source communities, popularizing open-source culture, and developing open-source curriculum systems. +- **CCF Open Source Development Committee** + - The "Open Source University Tour" series initiated by the Open Source Development Committee of the China Computer Federation was successfully held at prestigious universities like Tsinghua, Peking, Beihang, and Fudan, leaving a significant impact and achieving successful practices. +- **OSPP Organizing Committee** + - To enable more students to understand and participate in open-source projects deeply, the OSPP organizing committee partnered with many excellent open-source communities to launch the "OSPP Campus Tour." The OSPP Campus Tour series aims to ignite the energy and vitality of the new generation of developers, allowing more students to gain an in-depth understanding of well-known open-source technologies, projects, and communities both domestically and internationally and to popularize open-source culture in more universities. +- **Hongshan Open Source** + - The Hongshan Open Source community launched the "Hongshan Open Source University Tour" for key universities and directions, enhancing the community's influence and popularity and attracting more outstanding innovative resources to construct the open-source creation ecosystem. + +Such activities are expected to become one of the main channels through which college students can access open-source education in the future. + + +### 6.8 China's policies related to open-source education + +In 2023, while notable advancements were made in the practical application of open-source education in China, supportive policies at the national level were relatively scarce. + +Certain local governments, however, have started to recognize and foster the growth of open-source education. For instance, on December 29, 2022, the Changfeng Alliance Think Tank Base submitted "Suggestions for Strengthening Open-Source Talent Education in Beijing," which systematically addresses open-source talent education's current state and challenges. The proposal advocates for enhancing open-source talent training by the Beijing municipal government. As a leading hub for China's open-source ecosystem, Beijing's role in advancing open-source talent education is pivotal, contributing to the cultivation of software talent aligned with industrial demands, establishing a sustainable open-source ecosystem, and enhancing software technological innovation and supply capabilities. + +Furthermore, the "Guide for the Construction of Demonstration Software Colleges with Characteristics (Trial)" jointly issued by the Ministry of Education and the Ministry of Industry and Information Technology in 2020 has prompted universities to engage more deeply in open-source education. The guide underscores the importance of cultivating software talent with distinctiveness, exploring professional development patterns, and focusing on the specific needs of open-source talent in critical areas such as foundational software, industrial software, and emerging platforms. It also encourages cultivating vital open-source projects and gathering outstanding talent, providing robust support for industrial innovation. + +Despite the limited dissemination of policy-related messages in 2023 (which may be amid formulation), existing policy documents have already positively impacted open-source education. As we look ahead to 2024, more national-level policies are anticipated to be released, further guiding and promoting the practice of open-source education in China. + +## 7. Open source ranklists and reports summary + +Besides KAIYUANSHE, multiple media outlets, organizations, and institutions have published numerous open-source-related rank lists, reports, blue papers, and more. To provide readers with a comprehensive understanding of this topic, we have compiled a summary in this section. + +### 7.1 A few valuable reports + +- In February 2023, KAIYUANSHE released the **China Open Source Annual Report 2022**, which has four parts: questionnaire, data, commercialization, and chronicles. The questionnaire includes data analysis measures and reports on open-source community metrics and commercialization. X-lab Open Lab, Apache Devlake Community, and Gitee produce the data chapter. Yunqi Partners wrote the commercialization chapter and focused on promoting open-source software globally. The open source chronicles chapter comprises five parts: commercialization, security, technology, law, community, and ecology. +- In April 2023, the InfoQ Research Center released the **China Open Source Ecology Atlas 2023**. It's a user-friendly directory and map of China's open-source projects. The map includes 931 Chinese open-source projects, covering seven segments: operating systems, databases, artificial intelligence, cloud-native, big data, front-end, and middleware. Additionally, the map includes ecological organizations such as labs/institutes, open-source foundations, open-source industry alliances, developer communities, and code-hosting platforms. +- In June 2023, the China Open Source Promotion Union (COPU) collaborated with 106 organizations, including CSDN, the Institute of Software Research of the Chinese Academy of Sciences, the Open Atom Open Source Foundation, the Beijing Open Source Innovation Committee, the Open Source Society, OSChina, Peking University, East China Normal University, National University of Defense Technology, and more than 120 open source experts and volunteers. Together, they released the **2023 Blue Book of China's Open Source Development**, which provides a comprehensive overview of China's open source industry ecosystem in 2023. The book also showcases China's current open-source technology innovation and industrial development. +- In December 2023, the 2023 China Open Source Developer Report, co-authored by OSChina and Gitee, was officially released. The report is divided into three parts: Open Source Developer Event Review, 2023 LLM Technical Report, and Insight: New Open Source Trends for Chinese Developers. +- In December 2023, the iResearch Consulting Group published the **2023 China Open Source Infrastructure Software Industry Research White Paper**. This report examines the growth trajectory of China's open-source software by comparing and analyzing the development experiences of the domestic and international open-source software industries. The whitepaper summarizes the open source software industry chain and main drivers, analyzes the business model and value of open source software, examines the main characteristics of open source projects and all the parties involved in the industry, and presents readers with an ecological landscape of the open source industry rooted in China. +- The China Academy of Information and Communications Technology (CAICT) Trusted Open Source Team has been researching open source for an extended period. In 2023, they released a series of trusted open-source reports, which include "Panoramic Observations on China's Enterprise Open Source Governance", "O"pen Source Intellectual Property R"ig"hts Casebook (Copyright Chapte"r)", "Digital Public Goods Insight Report", "OSPO Case Compilation (Issue 2)", research reports or casebooks on open source technologies for front-ends, databases, and communications, and other research reports or case collections on open source technology for niche industries. + +### 7.2 A reference-worthy ranklist + +- **The 2023 Open Source Innovation List** is a selection activity co-sponsored by the Science and Technology Communication Center of the China Association for Science and Technology, the China Computer Federation, the China Institute of Communications, and the Institute of Software, Chinese Academy of Sciences. It is being undertaken by CSDN and evaluated by over 20 open-source experts from national associations, universities, research institutes, enterprises, open-source foundations, and industry alliances. The selection process is serious and rigorous. +- A new initiative called **China Open Source Coding Hero** has been launched by SegmentFault, KAIYUANSHE, and X-lab Lab. Each year, 99 developers from China are ranked according to their contribution to open source development using the OpenRank algorithm. These developers are recognized for their valuable contributions to the open source community. +- **OSS Compass**: Released in February 2023, OSS Compass is a platform for open-source ecological health assessment (https://oss-compass.org), open to all open-source projects on GitHub, Gitee, and other platforms. The platform is jointly initiated and collaboratively developed by the National Industrial Information Security Development and Research Center, Open Source China, Nanjing University, Huawei, Peking University, the New Generation of Artificial Intelligence Open Source Open Platform (OpenI), Baidu, and Tencent Open Source. At the same time, the platform itself is an open-source project around which an open-source and open-minded community has been formed. The platform has built an open-source ecological assessment system that includes three dimensions of productivity, robustness, and innovation, covering 14 indicator models. +- **Alibaba Open Source Developer Contribution List**: a list that ranks open-source developers based on their contributions. This list uses the OpenRank algorithm. Two PhD students conducted research on the impact of this list. They analyzed statistical indicators of community projects and interviewed developers. The research provides valuable insights to the open-source community and has been included in ICSE 2024. + +### 7.3 Ranklist to watch + +- **China's Open Source Pioneers** is a list of 33 individuals from previous years whom Open Source Pioneers recommended. This list is co-organized by SegmentFault and KAIYUANSHE and is based entirely on preference. The selection process starts with a simple idea: "I want to introduce this friend, an open-source person, to you." The nominees are then voted on based on the principle: "I'd love to meet this friend, an open-source person, and I hope more people will meet this friend." This list is an excellent resource for anyone interested in open-source pioneers and is worth checking out to learn more about these individuals. +- **OSC China Open Source Project Selection List**:OSChina conducted a series of selection activities in 2021 and 2022, which included evaluating the health of Chinese open source project communities, identifying the most popular Chinese open source projects, recognizing excellent international open source projects in the Chinese community, and more. However, for some reason, these selection activities were not continued in 2023. + +### 7.4 Worthless rank list + +- There is an organization called the "International Testing Committee BenchCouncil" that claims to have created a fair and scientific process for ranking open source contributors. They have published a list claiming to be "the world's first open source contribution list." However, the list ranks Linus, the creator of the Linux operating system kernel, only at 12th place, which seems absurd. diff --git a/en/preface.md b/en/preface.md new file mode 100644 index 0000000..7c16558 --- /dev/null +++ b/en/preface.md @@ -0,0 +1,37 @@ +--- +outline: deep +--- +# Preface + +In order to stay ahead of the curve, I made a conscious decision to write this year's introduction without relying on AI assistance. It took a lot of grit and determination, but ultimately it has been a rewarding experience. With the growing trend of people seeking AI help in various jobs, it's important to remember the value of human skill and effort. By choosing to write this introduction by hand, I'm demonstrating my commitment to the importance of human creativity and ingenuity."I resisted the temptation to use ChatGPT and wrote this year's introduction manually. It required a lot of perseverance, but it's worth it. It seems like the trend of seeking AI assistance for various jobs is growing in 2023."It took me a lot of "perseverance" to resist the urge to use ChatGPT to help me write this year's introduction, so I'm going to write it entirely by hand. This is actually one of the trends of 2023: more and more jobs, people are trying to seek AI help. + + +### AI & AIGC + +In 2023, we witnessed the birth of countless open-source big models, along with numerous popular GPT-based applications. Additionally, new terms and projects like AutoGPT, LangChain, CoT, and RAG emerged in various fields such as image generation, speech generation, code generation, and more. These developments have led to significant advancements in AI technology and open-source ecology. + +In the annual open source report, drastic changes were observed from 2020 to 2022. However, in 2023, we saw a massive wave in IT technology and open-source ecology that will shape the future of the industry. + +### About Omni-Data + +This year's annual report on open source in China has a significant story to tell. For the first time, we were able to combine GitHub and Gitee data to create a comprehensive comparison and gain new insights. Some of the findings may challenge the "prejudices" that many people hold about China's open source activity and contribution. We will continue to expand our data sources to make them truly "Global." + +### How does the open source community tackle those toughest challenges? + +In July 2023, the Linux Foundation organized the Open Source Congress in Geneva, Switzerland. The congress aimed to address pressing issues confronting the open source community, such as cybersecurity, the rise of techno-nationalism, the complexity of artificial intelligence, and the growing challenge of regulatory scrutiny. The congress invited 73 open source organizations, including KAIYUANSHE and the Open Atom Open Source Foundation, to send representatives to the meeting in Geneva. + +The first open source "congress" was an ambitious endeavor, and it is just the beginning. The future will require open-source practitioners worldwide to work together better to address the challenges. + +### How has the year gone for open source in China? + +Don't let statistics overshadow the bigger picture. My intuition tells me that while there is external heat, there is internal cooling. National policies, local policies, technical conferences, and community exchanges are vibrant and lively. However, the open-source community's development activity has slowed down in China and globally. + +We cannot afford to be complacent or discouraged. Instead, we must evaluate objectively and avoid being too late. Let's strive for progress, not perfection. + +As we approach 2023 and 2024, we must ask ourselves: what should we expect? Which directions should we pursue? How can we remain resilient in a rapidly changing world? Join me as we explore these questions together. + +
+Biaowei Zhuang, Executive Director, kaiyuanshe + +January 14, 2024 +
diff --git a/en/questionnaire.md b/en/questionnaire.md new file mode 100644 index 0000000..3e43484 --- /dev/null +++ b/en/questionnaire.md @@ -0,0 +1,258 @@ +--- +outline: deep +--- +# OSS Questionnaire + +## 1. Background + +As a continuation of the tradition since the release of the China Open Source Community Survey 2015 in early 2016, at the end of 2023 we launched another annual participatory survey of Chinese open source communities, dedicated to presenting the overall state of open source development in China in a multi-dimensional manner through continued developer survey reports.Using tools such as data analysis and survey reports, we have succeeded in producing a map of China’s open-source world in 2023. + +The questionnaire addresses the multiple roles of the interviewees and aims to gain insight into community development trends at various levels.Based on the level of participation of the open source community, the respondents are divided into several roles: users, participants, contributors, maintainers, and ecosystem operators.This shapes onion model and layer evolution.The four role levels are defined as: + +- User:users who have used one or more open-source products +- Participant:Users who interact with the open source community (e.g. communication with open source communities, participation in activities of open source community organizations, etc.) +- Contributor: Users who contribute substantially to the open source community(including code and non-code contributions.) +- Maintainer:Users primarily responsible for daily operations to the open source community (including project maintainer, PCC members, etc.) + +In addition, ecosystem operators are the users who are primarily responsible for day-to-day operations in the open source communities, at a level above the participants and collectively referred to as operators.In addition to raising basic questions for all interviewees, the questionnaire addresses several different roles for users, contributors and operators. + +The **basic information** for this questionnaire is as follows: + +- **Audiences** :covers developers, community members, contributors, students, government and enterprise mangagement personnel. +- **Topics** :mainly covers personal information, work status, open source communities, developer technologies, etc. +- **Method** :Collects samples and data using online questionnaires to analyze data across comparisons +- **Channels** :KAIYUANSHE, KubeCon + CloudNativeCon + Open Source Summit China, 2023 Eighth Annual Open Source Conference in China, 2023 Open Atomic Developers Conference, 2023 Open Source Industry Ecology Conference +- **Question Type** \* :single-choice, multiple-choice, open-ended +- **Number of Questions** :43 +- **Sample Quantity** :875 + +## 2. Preview of questionnaire results + +**Characteristics of Respondents** + +- The age distribution of the interviewees is evenly distributed, with general education above undergraduate level. Gender and regional distributions align with the geographical distribution of developers in China, covering various roles in the computer industry. + +**Open Source Participation** + +- **The activity of the open source community** is an area of particular concern for the interviewees;**Artificial Intelligence** has become a technical area of concern for the majority of the interviewees. + +**Open Source Contributions** + +- Interviewees contribute more to warehouses of **Technical Base Type**; respondents are more motivated by **Communities / Honorary Motivations** and require less material incentives. + +**Community Operations Survey** + +- Most of the operators interviewed are in the open source community\*\*.Nearly half of the respondents' respective companies prioritize **the standardization and management of open source software usage**. + +**Household Open Source Development Survey** + +- The respondents are **optimistic about the future development of open sources in the country**.With regard to the evolution of artificial intelligence in open source ecology, developers generally appreciate the prospects for its application in **increased efficiency, automated testing and data analysis** and consider that **data security, transparency, ethics** are the main challenges. + +## 3. Analysis of the Questionnaire + +### 3.1 Features of the Interviewee + +First, we conduct surveys from the point of view of age, gender, academic qualifications, resident city, industry and professional identity, through which basic information about participants can be obtained, thus analysing the identity of the audience groups in open source communities. + +#### 3.1.1 Age, Gender, Education, City + +| Age | Gender | +|:----------------------------------------------------------------:|:----------------------------------------------------------------:| +| | | + + +The age distribution of respondents to this questionnaire is similar to that of previous years, mainly in the 21-50 age group, with a more balanced age distribution.It is worth noting that the proportion of respondents under 21 years of age is 25.71%, a significant increase from 8.42% last year.The participation of young respondents in the table has increased considerably. + +In terms of gender, male respiondents account for a higher proportion, reaching 73.37%, while females accout for 25.83%. Compared to last year's questionnaire, the proportion of women and men interviewed has increased significantly and is consistent with the current lack of gender balance among developers. + +| Educational background | Region | +|:----------------------------------------------------------------:|:----------------------------------------------------------------:| +| | | + +Respondents generaly have an educational background of at least a bachelor's degree; in urban distribution, the majority of the respondents are from Jiangsu, Sichuan and Shanghai, partly because our online sources of questionnaire collection are in those cities.There are also more interviewees in Beijing and Guangdong provinces, and there is a more consistent distribution of developers in the overall distribution and data sets. + +#### 3.1.2 Occupation in Industry, Profession + +| Industry | Career status | +| :----------------------------------------: | :----------------------------------------------------: | +| | | + +The majority of the respondents are in the Internet/IT / electronic/ communications industry, accounting for 72.23%, indicating that the survey primarily covers the field of science and technology. + +In terms of professional status, 43.20% of students are in school, followed by back-end developers, architects and academic researchers. Overall, the respondents are predominantly technical practitioners and students and cover a number of occupations in computer industries. + +### 3.2 Open source participation + +#### 3.2.1 Level of participation by open source communities + +| Role of open source communities | Time to contact open source | +| :----------------------------------------: | :----------------------------------------------------: | +| | | + +The survey shows that the vast majority of members of open source communities are users (73.37%), while close to half of the participants (49.03%) and some contributors (26.51%). + +Regarding the duration of involvement in open source, one-third of respondents have been involved in open source communities for less than a year, while nearly half have more than 3 years of experience. + +We have cross-analyze the question "To what extent do you think you are a member of an open source community" with interviewees' roles in an open source community. + +| Extend of Considering Oneself a Part of the Open Source Community | +| :------------------------------------------------------: | +| | + +It can be seen that there is a greater sense of belonging among the maintainers, contributors, ecosystem operators than participants and users in the open source community. + +The following questions were addressed to respondents who had a role in the open source community at the “user” level and above. + +#### 3.2.2 Use of Open Source Products + +| Reason for Selecting Open Source Products | Factors Influencing Choice | +| :----------------------------------------------------: | :---------------------------------------------------: | +| | | + +The main reason that users chose to use open source software is free of charge for their products, followed by further development and a favourable community environment. + +In selecting open source products, participants are more focused on the level of code regulation and the activity of developers. This indicates that users are concerned not only about the functionality and quality of open source products, but also about the activity of communities and developers and the sustainability of projects. + +| Issues Encounterred When Using Open Source Products | Factors Prompting Open Source Contributions | +| :----------------------------------------------------: | :----------------------------------------------------: | +| | | + +Among the problems encountered, the most common is the lack of documentation for the project, followed by an unstable update. + +Factors such as personal interest, community atmosphere and technological upgrading play an important role in promoting open-source contributions. + +#### 3.2.3 Technical Direction + +| Interested Technical Directions | Known Open Source License | +| :----------------------------------------------------: | :----------------------------------------------------: | +| | | + +The interviewees show strong interest in artificial intelligence, accounting for 67.43%, followed by development tools, containerization and cloud computing. + +For open source licenses, Apache is the most popular option, followed by MIT and GPL. + +#### 3.2.4 Information Exchange + +| Ways to Retrieve Open Source Products | Communication Methods with the Community | +| :----------------------------------------------------: | :----------------------------------------------------: | +| | | + +When searching for open source products, most people search through code-hosting platforms, technical communities or media recommendations, and search engines. + +Communication with open source communities is mainly in the form of domestic communication tools (e.g. DingTalk, WeChat, QQ, Feishu, etc.) and asynchronous communication tools (e.g. GitHub Issue, Discussion, Mail List etc.), while internationalized communication tools (e.g. Slack, Skype, Telegram, Lark and others) are also widely used.The international open source community is characterized by a predominance of asynchronous communication tools, which differ remarkably from domestic practices. + +| Frequently Used Products / Technology Community | Media to Get Open Source Information | +| :----------------------------------------------------: | :-----------------------------------------------------: | +| | | + +Interviewees are mainly engaged through a code hosting platform and open source community participation. In addition, a large number of respondents participate in open source communities through domestic technical forums. + +In terms of access to open source information, video platforms and question-and-answer websites are the main options, reflecting the preference of developers for access to open-source knowledge, including through audio-visual and interactive question-and-answer sessions. + +### 3.3 Open Source Contribution + +This section's questions are aimed at respondents whose roles in the open source community are "contributors" and above. + +#### 3.3.1 Level of Open Source Contribution Participation + +| Participation in Open Source Project Activity | Time of Weekly Open Source Participation | +| :----------------------------------------------------: | :----------------------------------------------------: | +| | | + +One third of student developers have been involved in open-source activities such as Google Summer (GSoC) and Open Source Lighting Scheme (OSPP); more than half of contributors have been involved in open source activities for more than 5 hours a week, and more than 10% of contributors have participated in open source activities for 35 hours a week, nearly reaching the standard of full-time developers. + +#### 3.3.2 Ways of Contributing to Open Source + +| Main Open Source Contribution Platforms | Commonly Used Development Laungauges for Open Source Contributions| +| :----------------------------------------------------: | :-------------------------------------------------------: | +| | | + +GitHub remains the preferred platform for the most respondents, occupying a dominant position, followed by Gitee and GitLab. This indicates that among Chinese developers, GitHub still holds significant influence, although domestic platforms are gradually emerging. The main development languages used include Python, Java, C, JavaScript, Go. In addition, HTML/CSS, TypeScript and others are given a high number of choices. + +#### 3.3.3 Open Source Contribution Content + +| Main Types of Contributions | Types of Contributed Projects | +| :----------------------------------------------------: | :----------------------------------------------------: | +| | | + +Interviewees contribute to open source projects mainly by writing codes and documents. In addition, open source advocacy, open source community operations and facilitating community activities are also common contribution methods. + +The types of open-source project that contributed are mainly concentrated in library/middleware and common framework/infrastructure, reflecting developers' deep interest in foundational technologies. + +#### 3.3.4 Incentives + +| Incentives | Sources of Financial Return | +| :----------------------------------------------------: | :----------------------------------------------------: | +| | | + +Various incentives have been positively evaluated, indicating that the diversity of incentives has had a positive impact on open source participation by developers. In particular, respondents believe that incentives for honour and social interaction have a more significant positive impact on contributions. + +More than half of the developers participating in open source projects receive no financial rewards.The rest of the developers receive direct financial returns through compensation/salary, rewards/incentives, while very few developers receive financial support through advertising revenue, donations, and patent/intellectual property income. + +### 3.4 Community Operations Survey + +This section of the question is addressed to interviewees who are “operators” in the open-source community. + +#### 3.4.1 Overview of Open Source Communities + +| Number of Community Users | Active Developers | +| :----------------------------------------------------: | :----------------------------------------------------: | +| | | + +Nearly 60% of operators belong to open source communities with fewer than 200 users, while almost 30% belong to communities with over 500 users. More than half of the operators belong to the communities with fewer than 20 active developers. + +#### 3.4.2 Open Source Community Management + +| Community Management | Community Commercial Support | +| :-------------------------------------------------------------: | :-------------------------------------------------------------: | +| | | + +About half of the communities have clear governance structures and professionals responsible for day-to-day operations. At the same time, communities have generally developed clear norms and provided updated documentation to support member inclusion. + +Most open source communities have commercial support and are mainly in the form of declarations and synergistic development. + +#### 3.4.3 Research on the Commercailization of Open Source Software + +| Usage of Open Source Software in Enterprise | Agreement with commercialization of Open Source Projects | +| :----------------------------------------------------: | :----------------------------------------------------: | +| | | + +The vast majority of businesses use open-source software, with a clear ratio of 5:6 between samples with clear usage requirements and regulatory norms and those lacking corresponding management standards. This indicates that while some companies emphasize standards and management when using open source software, a large proportion of enterprises are still more loosely regulated, which may be influenced by factors such as company size, industry differences, and understanding of open source software. + +The level of acceptance for the use of open source projects for commercialization averages 3.65, with 31.66% gaving the highest acceptance ratings, indicating that most respondents hold a moderate to high acceptance of the project. + +### 3.5 Open Source Development Research + +#### 3.5.1 Open Source Development + +| Development of Open Source Communities | +| :----------------------------------------------------: | +| | + +Overall, the respondents generally view the future development of open sources in the country as positive in all its aspects. + +| Characteristics of the Continuous Development of Open Source Projects | Evaluation Indicators of Open Source Projects | +| :-------------------------------------------------------------------: | :----------------------------------------------------: | +| | | + +The respondents believe that the most important characteristic that affects the health and sustainability of an open source community is the speed of a rapid community response, and that new and emerging contributors with a continuing influx can be transformed into long-term contributors. Demonstrating long-term sustainability is critical to successful community development. + +When evaluating open source projects, respondents mainly focus on project influence, authority, community activity and continued renewal and maintenance. This reflects developers' concerns about the overall state of health of the project at the technical and community levels. + +#### 3.5.2 Impact and Challenges of Artificial Intelligence on Developers and Open Source Ecosystem + +| AI Impact on Developers | AI Future Role in Open Source Communities | +| :----------------------------------------------------: | :----------------------------------------------------: | +| | | + +The survey results show that developers are more optimistic about the impact of artificial intelligence technologies on open source projects, especially in terms of greater application prospects for efficiency, automated testing, data analysis and project safety. + +| Challenges for Artificial Intelligence in Open Source Ecosystem | +| :-----------------------------------------------------------: | +| | + +In addition, issues of privacy and data security, transparency and ethics are seen as major challenges facing artificial intelligence technologies in open source ecosystem, indicating the need to balance technological challenges and social considerations in AI technology applications. + +:::Expert Commentary +**Jie YU**:Faced with the wave of AI, we should remain calm and confident, embrace it with positive attitude, learning from it, and make full use of AI technology to promote the continuous development of individuals and projects. +::: diff --git a/index.md b/index.md new file mode 100644 index 0000000..1d67326 --- /dev/null +++ b/index.md @@ -0,0 +1,284 @@ +--- +# https://vitepress.dev/reference/default-theme-home-page +layout: home + +hero: + name: "2023 中国开源年度报告" + text: "" + tagline: 开源社联合多家单位,纵横近十年对中国开源行业的综合性报告,每年发布一次 + actions: + - theme: brand + text: 立即阅读 2023 年度报告 + link: /preface + - theme: alt + text: 往年报告 + link: https://kaiyuanshe.feishu.cn/wiki/wikcnUDeVll6PNzw900yPV71Sxd + +features: + - icon: + src: "/image/home/KaiYuanShe-logo.png" + width: 40 + height: 40 + title: 开源社 + details: 开源社(英文名称为“KAIYUANSHE”)成立于 2014 年,是由志愿贡献于开源事业的个人志愿者,依 “贡献、共识、共治” 原则所组成的开源社区。开源社始终维持 “厂商中立、公益、非营利” 的理念,以 “立足中国、贡献全球,推动开源成为新时代的生活方式” 为愿景,以 “开源治理、国际接轨、社区发展、项目孵化” 为使命,旨在共创健康可持续发展的开源生态体系。 + link: https://kaiyuanshe.cn/ + linkText: 官网 + - icon: + src: "/image/home/yunqi_partnets_logo.jpg" + width: 40 + height: 40 + title: 云启资本 + details: 云启成立于 2014 年,国内最早专注于「科技创新+产业赋能」的专研型创投机构,投资范围覆盖前沿科技、先进制造、企业软件、产业供应链科技等赛道,多次蝉联清科、投中、36 氪等「中国最佳早期投资机构 TOP 10」。作为早期领投方,云启已投资了 170 多家优秀创业公司,其中 30 多家已成长为行业领头羊企业,包括 360 数科(NASDAQ:QFIN)、英科医疗(SZ:300677)、英科再生(SH:688087)、酷家乐、百布、元戎启行、MiniMax、擎朗智能、 XTransfer、环世物流、德风科技等优秀科技公司。同时,云启持续参与共创开源生态,领投了 PingCAP, Zilliz, Jina AI, RisingWave, TabbyML 等多家开源企业,并于 2021、2022、2023 年联合开源社出品中国开源年度报告商业化篇。 + link: https://www.yunqi.vc/ + linkText: 官网 + - icon: + src: "/image/home/x_lab2017_logo.jpg" + width: 40 + height: 40 + title: X-lab 开放实验室 + details: X-lab 开放实验室定位为一个开源研究与创新的开放群体,是一群由来自国内外著名高校、创业公司、部分互联网与IT企业的专家学者与工程师所构成,聚焦于开源软件产业开放式创新的共同体。专业背景包括计算机科学、软件工程、数据科学、工商管理学、社会学、经济学等跨学科领域,长期思考并实践开源战略、开源测量学、开源数字生态系统等主题。目前已在包括开源治理标准制定、开源社区行为度量与分析、开源社区流程自动化、开源全域数据治理与洞察等方面做出了较有影响力的工作。 + link: https://github.com/X-lab2017 + linkText: GitHub 主页 +--- + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

郭雪、姜宁、蒋涛、tison、卫剑钒、余杰

+ + + + + +
+ + +
+ diff --git a/open-source-milestones.md b/open-source-milestones.md new file mode 100644 index 0000000..395d2b5 --- /dev/null +++ b/open-source-milestones.md @@ -0,0 +1,790 @@ +--- +outline: deep +--- + +# 开源大事记 + +## 概述 + +中国开源年度报告的《开源大事记》篇,为什么会收录很多国际开源的新闻?因为在我们看来:这些都是中国的开源人需要关心的大事,也是正在或即将影响中国开源界的大事。 + +今年的开源大事记,可以说是来自各界的编辑志愿者们心尖上 2023 年的开源大事件。我们思考的大致脉络是: + +* 由全球 “**开源技术**” 的颠覆式创新,如人工智能与机器学习等,为主线; +* 加上地缘政治态势带来的全球冲突,直接或间接地影响了 “**开源生态**” 不问西东(或东升西变)的起伏变化; +* 从而导致了从区域、法律、贸易、社区方方面面 “**开源治理**” 的范式转移; +* 其中 “**开源安全**” 议题更是被视为重中之重; +* 挑战带来机遇,更多挑战带来更多机遇,“**开源商业化**” 已经成为显学与主流。2023 年虽然艰难,但是全球(包含中国)仍有大批开源初创企业前仆后继; +* 面对这个技术、生态、治理、商业化都发生巨变的新时代,“**开源教育**” 承先启后,成为开创新局最重要的基石。以人工智能的颠覆式创新为例,没有高瞻远瞩的科研与高教体系的坚持投入,就不会有今天的荣景; +* 最后的 “**开源榜单与报告**”,算是餐后甜点,看看 2023 年的开源榜单与报告,是否洞烛机先,预见了今后中国的开源发展? + +今年是 AI 爆发之年,所以在任何一种类别的大事记里,都少不了 AI 的身影,诸位读者不妨纵览全盘,以获取整体的印象。 + +总之,我们正处在一个 AI 即将改变世界一切运作模式的前夜,期待在明年的《开源大事记》再见! + +## 一. 开源技术大事记 + +### 1.1 人工智能与大模型 + +- **智谱 AI GLM** + +智谱 AI 开源了 ChatGLM-6B 系列,ChatGLM-6B 是一个开源的、支持中英双语问答的对话语言模型。另外,智谱 AI 开源了多模态对话模型 VisualGLM-6B(CogVLM),这个模型结合了图像处理和自然语言处理的能力,支持中文和英文对话,旨在提供更加丰富和直观的交互体验。 + +- **百川 Baichuan** + +百川在过去一年开源了多个版本大模型,包括 Baichuan-7B 。后续百川发布了 13B 模型,以及百川 2 系列模型,同步开源 base 和 chat 两个版本。后续开源了 Baichuan2-192K 大模型,上下文窗口长度高达 192K。 + +- **书生大模型体系** + +上海人工智能实验室发布全新升级的「书生通用大模型体系」,包括书生·多模态、书生·浦语和书生·天际等三大基础模型,以及首个面向大模型研发与应用的全链条开源体系。 + +- **通义千问 Qwen** + +阿里巴巴开源了通义千问 7B 模型,后续相继开源了 1.8B,14B,72B 的 base 和 chat 模型,并提供了对应的 int4 和 int8 的量化版本,在多模态场景,千问也开源了 qwen-vl 和 qwen-audio 两种视觉和语音的多模态模型。 + +- **昆仑万维天工** + +昆仑万维发布百亿级大语言模型「天工」Skywork-13B 系列,并配套开源了 600GB、150B Tokens 的超大高质量开源中文数据集。Skypile/Chinese-Web-Text-150B 数据集。 + +- **RWKV** +RWKV 作为非 Transformer 结构的大语言模型,自发布以来一直进行持续开源。在 2023 年,RWKV 发布了多个版本,并进入 LF AI & Data 进行孵化。 + +- **源 2.0** + +浪潮信息正式发布千亿级基础大模型「源 2.0」。该系列模型全面开源可商用,共包括参数值 102B(1026 亿)、51B(518 亿)、2B(21 亿)三个版本。与源 1.0 相比,源 2.0 在编程、推理、逻辑等方面均有提升。 + +- **零一万物 Yi** + +2023 年 11 月,01-AI(零一万物)发布了 Yi 系列模型,其参数规模介于 60 亿至 340 亿之间,训练数据量达到 300 亿 token。 + +- **幻方量化 DeepSeek** + +幻方量化旗下 DeepSeek 发布 67B 开源大模型,DeepSeek 已同时开源 7B 和 67B 的两种规模模型,均含基础模型(base)和指令微调模型(chat)。无需申请,免费商用。同时,项目团队还将训练中途的 9 个模型 checkpoints 开放下载。 + +- **蚂蚁集团开源代码大模型 CodeFuse** + +蚂蚁集团开源了 CodeFuse-13B 和 CodeFuse-CodeLlama-34B 的代码大模型,目前该模型支持多种与代码相关的任务,如代码补全、文本转代码、单元测试生成等。开源内容包括:MFT(多任务微调)框架、用于增强 LLMs 编码能力的数据集和部署框架。 + +- **Meta Llama 2** + +2023 年 7 月,Meta 公司宣布,其 Llama 2 项目已成功开源了三种不同规模的预训练模型,分别为 7B、13B 和 70B 参数版本。在预训练阶段,模型经过了 2 万亿 Token 的大规模训练。SFT 阶段,模型进一步利用了超过 10 万条数据进行微调,以提升其在特定任务上的表现。另外,Meta 还开源了基于对话数据进行 SFT 优化后的 Llama2-Chat 模型,后续 Meta 继续开源了 CodeLlama 编程语言大模型。 + +- **Mixtral 8x7B** + +2023年 12 月,Mixtral 开源了 MoE 开源模型 Mixtral 8x7B,在 Apache 2.0 许可证下可商用,Mixtral-8x7B 是一款混合专家模型(Mixtrue of Experts),由 8 个拥有 70 亿参数的专家网络组成,这种结构不仅提高了模型处理信息的效率,还降低了运行成本。 + +- **Falcon 180B** + +Falcon 180B 它是由 Technology Innovation Institute(TII)在发布的开源大型语言模型。该模型拥有 1,800 亿个参数,并使用 TII 的 RefinedWeb 数据集进行了训练。 + +- **阿拉伯语 AI 大模型 Jais 开源** + +阿联酋研究团队宣布开源阿拉伯语大模型 Jais。Jais 是一个经过 130 亿个参数预训练的阿拉伯语和英语双语大型语言模型。 + +- **微软开源视觉基础模型 Visual ChatGPT** + +微软推出开源项目 Visual ChatGPT,它将 OpenAI 的 ChatGPT 与一系列视觉基础模型(Visual Foundation Models,简称 VFM)结合起来,使得用户能够在聊天过程中发送和接收图像。这个项目旨在扩展 ChatGPT 的功能,使其不仅能够处理文本,还能够理解和生成图像,从而实现多模态的交互体验。 + +- **NVIDIA 正式开源 TensorRT-LLM** + +英伟达推出名为 TensorRT-LLM 的深度优化的开源库,能够在 Hopper 等 AI GPU 上加速所有大语言模型的推理性能。在性能测试中,英伟达以 A100 为基础,对比了 H100 以及启用 TensorRT-LLM 的 H100,在 GPT-J 6B 推理中,H100 推理性能比 A100 提升 4 倍,而启用 TensorRT-LLM 的 H100 性能是 A100 的 8 倍。 + +- **马斯克主导 X(原 Twitter)开源推荐算法** + +X(Twitter)在 GitHub 上发布了两个仓库 (main repo 、 mlrepo),其中涵盖了推荐算法在内的许多推特源代码,包括用来控制用户在 For You 时间线上看到的推文的机制。 + +- **Hugging Face 更改文本推理软件 TGI 许可证** + +Hugging Face 宣布,在最新推出的 TGI v1.0 版本中,其开源许可证将从 Apache 2.0 改为 HFOIL 1.0。HFOIL 代表 Hugging Face Optimized Inference License,是 HuggingFace 专为优化推理解决方案而设计的许可协议。 + +- **HuggingFace 开源 Rust 机器学习框架 candle** + +HuggingFace 最近开源了一款新颖的小型 Rust ML 框架——candle,运行速度极快,支持多种强大模型。它提供了对 GPU 的支持,并且具有优化的 CPU 后端,可以在浏览器中运行。Candle 还包含了多个预训练模型和示例,如语音识别模型、通用 LLM、计算机视觉模型等。 + + +- **阿里开源 AnyText** + +阿里开源多语言视觉文字生成与编辑模型——AnyText,AnyText对生成文字的把控可媲美专业PS,用户可自定义规划文字出现的位置,图片的强度、力度、种子数等 + +- **Jina AI 推出全球首个开源 8K 文本嵌入模型** + +Jina AI 宣布推出 jina-embeddings-v2 模型,是支持 8K(8192 个 token)上下文长度的开源产品,在功能和性能上与 OpenAI 的 text-embedding-ada-002 类似。 + +:::info 专家点评 +**郭雪**:2023 年的开源大事记中,开源大模型无疑是热度极高的话题。国内外均涌现一批开源大模型,然而开源大模型与开源软件相比,在社区形态以及风险治理等方面均有不同的特点。比如针对大模型的开源许可协议有一些使用场景限制,比如开源大模型是否还存在社区协作?产业还需要进一步理解开源对于大模型发展的意义以及发展模式。 +::: + +### 1.2 操作系统与编程语言 + +- **Linux 内核的长期支持(LTS)版本,维护期将从六年变回两年** + +本次调整是继 2017 年,Linux 内核 LTS 版本的维护期从两年改为六年后,又调整回去的一次操作。针对本次调整,《Linux 每周新闻(Linux Weekly News)》执行主编 Jonathan Corbet 表示:「因为人们不使用它们,所以维护(旧内核)那么长时间真的没有意义。」 + +- **印度国防部自研 Linux 发行版 Maya OS,全面替代 Windows** + +印度国防部宣布对其网络安全系统进行重大改革,计划采用一种名为 Maya 的 Linux 发行版,取代其所有联网计算机中的 Windows 操作系统。此举是为了应对日益增长的恶意软件和勒索软件攻击威胁。旨在促进自主创新,减少对外国软件的依赖。 + +- **红帽宣布 CentOS 7 和 RHEL 7 将在 2024 年 6 月 30 日结束支持** + +红帽宣布结束 CentOS 7 和 RHEL 7 的支持,也将不再公开 RHEL 的完整源码,而是只提供补丁和更新,同时要求 RHEL 的下游发行版(如 CentOS、Rocky Linux、AlmaLinux 等)必须在 30 天内重新编译和发布自己的版本,否则将失去对 RHEL 的兼容性和支持。 + +红帽后续补充声明,CentOS 社区不会消失。社区贡献者和 CentOS 用户将继续围绕作为 CentOS Stream 项目组成部分的开源 Linux 发行版展开合作。 + +- **谷歌开源浏览器项目 Chromium 宣布使用 Rust** + +谷歌发布博客宣布将支持在 Chromium 中使用来自 C++ 的第三方 Rust 库,计划年底前将 Rust 代码包含到 Chrome 二进制文件中。还表示 Rust 作为 Mozilla 开发的编程语言,在提供安全性的同时还具有高性能,最初专门被用于编写浏览器,因此 Chromium 之类的开源操作系统依赖这项技术也非常合适。 + +- **开源操作系统 openKylin 1.0 正式发布,已支持 Arm、RISC-V** + +openKylin 0.9 首次支持 Arm、RISC-V。openKylin 1.0 版本默认搭载 6.1+5.15 双内核,完成 20+ 操作系统核心组件自主选型升级,并新增许多新特性,修复超千个 bug,进一步提升系统整体稳定性和兼容性,为用户带来更好体验。 + +- **华为正式发布 HarmonyOS 4** + +华为正式发布 HarmonyOS 4(鸿蒙 4)操作系统。据悉,全新的 HarmonyOS 4 在隐私安全、AI 大模型能力和个性交互等方面有全新突破。 + +- **飞致云开源 1Panel** + +1Panel 是一款现代化、开源的 Linux 服务器运维管理面板,为用户提供免费的服务器搭建与管理资源服务。 + +- **亚马逊云科技开源特定语言 Cedar** + +亚马逊云科技开源了他们用来定义策略访问权限的领域特定语言 Cedar。Cedar 已集成在 Amazon Verified Permissions 和 AWS Verified Access 中,还可以通过 SDK 和语言规范将Cedar 直接集成到应用程序中。 + +Cedar 可以在应用程序代码之外定义访问策略,这种分离使得它们能够独立地进行编写、分析和审计。Cedar 支持基于角色的访问控制(RBAC)和基于属性的访问控制(ABAC)。 + +- **微软发布 Guidance 语言** + +微软推出了一种名为 Guidance 的领域专属语言,旨在增强开发人员管理当代语言模型的能力。这个新框架将诸如生成、提示和逻辑控制等任务集成到一个统一的开发流程中。这门编程语言使开发人员能够「将生成、提示和逻辑控制组织到一个连续的流中,从而与语言模型实际处理文本的方式相匹配」。 + +它可以与 Hugging Face 模型等提供程序无缝集成,并集成基于智能种子的生成缓存系统和令牌修复,从而优化提示边界并消除词汇切分过程中的偏见。 + + +### 1.3 硬件技术与物联网 + +- **中国支持建设人形机器人开源社区** + +2023 年 10 月,中国工业和信息化部印发《人形机器人创新发展指导意见》,提出“建设人形机器人开源社区,推进开源基金会能力建设,加强对重点企业开源项目的支持力度,汇聚全球开发者协同创新。” + +- **斯坦福大学发布开源机器人 Mobile ALOHA** + +2023 年 3 月,斯坦福大学发布开源机器人 Mobile ALOHA(A Low-cost Open-source Hardware System),可通过远程操作执行精细任务,到 2023 年年底,已可以通过联合训练实现简单任务的自主操作。 + +- **Tesla 开源初代 Roadster 跑车设计和工程细节** + +马斯克在社交平台上发文表示,特斯拉 (Tesla) 初代 Roadster 跑车的设计和工程细节 “完全开源”,并发布了所有人都可以访问的研发文件。 + +- **openKylin 正式加入 RISC-V 基金会** + +openKylin 社区宣布正式加入RISC-V 基金会,成为其产业联盟成员,为 RISC-V 生态建设贡献更多力量,构筑操作系统与 RISC-V 架构软硬件生态协同发展。 + +- **阿里平头哥开源玄铁 RISC-V 系列处理器** + +平头哥开源了玄铁 RISC-V 系列处理器,并开放系列工具及系统软件。这是系列处理器与基础软件的全球首次全栈开源,将推动 RISC-V 架构走向成熟,帮助 RISC-V 软硬件技术加速融合发展,推动创新落地。 + +- **AMD 开源了 FSR** + +AMD 在 MIT 许可证下开源了 FSR(FidelityFX Super Resolution)3,与英伟达 DLSS 竞争的上采样技术,但与 DLSS 不同的是,它不依赖私有的 CUDA 核心,而是基于软件。 + +- **百度开源旗下消息中间件 BifroMQ** + +百度开源旗下 MQTT Broker 消息中间件 BifroMQ,该消息中间件由 Java 实现,特点是「高性能、分布式」,BifroMQ 采用 Serverless 架构,无缝集成了原生的多租户支持,该消息中间件源自百度物联网团队多年技术积累,旨在支持构建大规模的物联网设备连接和消息系统。 + + +### 1.4 Data Infra + +- **DragonflyDB 1.0 正式发布** + +DragonflyDB 是一个现代化的开源内存数据库,兼容 Redis 和 Memcached API,迁移时无需修改任何代码,可作为两者的替代方案。近日,DragonflyDB 正式发布了 1.0 版本,开发团队称已支持在生产环境使用,Dragonfly 1.0 完全支持 Redis 最常见的数据类型和命令,以及快照、主从复制和高可用等特性。 + +- **FerretDB 1.0 正式发布** + +号称 MongoDB 开源替代品的 FerretDB 1.0 正式发布,FerretDB 希望将 MongoDB 数据库的工作负载带回其开源的本源,使 PostgreSQL 和其他数据库后端能够运行 MongoDB 工作负载,保留 MongoDB 现有生态所提供的机会。 + +- **Apache Doris 2.0.0 版本正式发布** + +Apache Doris 2.0.0 版本于 2023 年 8 月11 日正式发布,有超过 275 位贡献者提交了超过 4,100 个优化与修复。在 2.0.0 版本中,Apache Doris 在标准 Benchmark 数据集上盲测查询性能得到超过 10 倍的提升。 + +- **Apache SeaTunnel 毕业成为 Apache 顶级项目** + +这是首个由国人主导并贡献到 ASF 的大数据集成领域的顶级项目。Apache SeaTunnel 原名 Waterdrop ,在 2021 年 10 月更名为 SeaTunnel 并申请加入 Apache 孵化器。SeaTunnel 是一个非常易于使用的、超高性能的分布式数据集成平台,支持海量数据的实时同步。 + +- **阿里云开源的图计算引擎 GraphScope 性能登顶权威榜单** + +国际权威图基准测评「LDBC SNB Interactive」 榜单公布最新结果,阿里云开源的图计算引擎 GraphScope 登顶并打破榜单历史纪录,其单节点执行图数据库查询的吞吐率超过 30000 QPS,性能达此前纪录保持者2倍。 + +- **百度开源自研高性能检索引擎 Puck** + +百度宣布在 Apache 2.0 协议下开源自研检索引擎 Puck,这也是国内首个适用于超大规模数据集的开源向量检索引擎。 + +- **字节跳动开源 ByConity** + +字节跳动将 ByteHouse 内核向社区开源为 ByConity,并于正式官宣发布 0.1.0 版本。 + +ByConity 定位为开源的云原生数据仓库,采用 Apache 2.0 许可协议,基于 ClickHouse 内核,但采用了存储计算分离的全新架构,支持多个关键功能特性,如存储计算分离、弹性扩缩容、租户资源隔离和数据读写的强一致性等。 + +- **阿里开源多数据库客户端工具 Chat2DB** + +Chat2DB 是一款有开源免费的多数据库客户端工具,支持 windows、mac 本地安装,也支持服务器端部署,web 网页访问。和传统的数据库客户端软件 Navicat、DBeaver 相比 Chat2DB 集成了 AIGC 的能力,能够将自然语言转换为 SQL,也可以将 SQL 转换为自然语言,可以给出研发人员 SQL 的优化建议。 + +- **ApeCloud 开源 KubeBlocks** + +KubeBlocks 是一个开源的在 K8s 上运行和管理数据基础设施的系统软件,旨在帮助开发人员、SRE、平台工程师在企业中部署和维护专用的 DBPaaS,并支持多种公共云和私有云环境部署。KubeBlocks 是目前 CNCF Cloud Native LANDSCAPE 收录的唯一的开源多引擎数据 / 数据库管理系统项目。目前已支持 32 种数据库,包括 MySQL 、PG 、MongoDB 、Redis 、Kafka 、Pulsar 等等。 + +### 1.5 云计算与基础软件 + +- **GragGAN 开源一天获得 2 万星标** + +DragGAN 是由 Google 的研究人员与 Max Planck 信息学研究所和麻省理工学院 CSAIL 一起开发的项目,是一个非常直观的图像编辑工具,用户只需要控制图像中的像素点和方向,就可以快速调整照片主体的位置、姿态、表情、大小和角度等。 + +- **LLMOps 平台 Dify.AI 代码完全开源** + +LLMOps 平台 Dify.AI 宣布 46,558 行代码完全开源,并临时决定将开源协议从 AGPL 放宽到 Apache 2.0。 + +- **华为开源跨端、跨框架、跨版本企业级应用前端组件库 OpenTiny 及高性能服务网格 Kmesh** + +OpenTiny 是华为云开源的 Web 应用前端开发套件,涵盖 Vue2/Vue3/Angular 多技术栈,拥有主题配置系统/中后台模板/CLI 命令行等工具库。 + +Kmesh 高性能服务网格,是通过架构创新为开发者带来全新网格性能体验,实现OS原生的服务网格数据面能力,基于ebpf+可编程内核技术,将流量治理下沉OS,大幅提升网格服务的访问性能。 + +- **百度智能云开源发布千帆 SDK 版本** + +百度智能云正式发布 Python SDK(简称千帆 SDK)版本,并全面开源,企业和开发者可以免费下载和使用。 + +- **火山引擎自研通用多媒体处理框架 BMF** + +火山引擎正式开源 BMF(Babit Multimedia Framework,八比特多媒体处理框架)是火山引擎自研的一套通用多媒体处理框架,能够提供简洁易用的跨语言接口、灵活的调度和扩展性。 + +以模块化的方式动态扩展、管理和复用视频处理的原子能力,以 Graph 的方式构建高性能的多媒体处理链路,帮助多媒体用户便捷、高效的将项目落地于生产环境。 + +- **字节跳动发布并开源 Rspack** + +Rspack 是由 ByteDance Web Infra 团队孵化的基于 Rust 语言开发的 Bundler,拥有高性能、兼容 Webpack 生态、定制性强等多种优点,目前 Rspack 已经完成了 Webpack Loader 架构的支持。 + +## 二. 开源生态大事记 + +一个有意思的现象是:如果在开源圈子里发生了什么好事情,多半应该写在商业篇。而如果出现了一些糟心的事情,多半就可以归入生态篇。当然,也不仅仅是坏事,也有一些好消息,以及各国的政策都会对开源生态产生深远的影响。 + +### 2.1 各大厂纷纷裁撤开源人员 +从1月初,就陆续传出谷歌、GitHub 与 GItLab 裁员的消息,甚至包括红帽这样的公司也在裁员,然后就是国内的各个大厂,也不断的有裁员的消息隐隐约约的披露出来。虽然咱们的这个大事记,主要关注开源生态与开源人的境遇,但是客观的说,大厂的确不是专门要裁开源人才的。只不过,一旦裁员开始,企业内部的开源人员,就会显得很 “可疑”,会被人追问:你们到底为公司创造了什么价值?而这个问题,始终不容易被严肃的、正面的回答! + +### 2.2 著名开源大佬生计艰难 +接下来的新闻,就更加令人唏嘘了。谷歌裁掉的一万二千人,被称之为 “Golden 12K”,其中就有一些著名的开源大佬。比如:在 19 年前创立了 Google OSPO 的 Chris DiBona,Samba 的联合创始人,61 岁的 Jeremy Allison,无奈发推表示,「刚从 Google 被解雇。如果有人需要 SMB 1/2/3 协议或者开源经验的,我很感兴趣」。 + +还有一些著名开源人的遭遇,就更惨了,咱们简单罗列一下标题吧: +- 《开源框架 NanUI 作者转行卖钢材,项目暂停开发》 +- 入狱 10 月、网暴不断,并挣扎在温饱线!超 90 亿次下载的开源项目,背后是这样的 9 年》 +- 《因资金短缺,全职开发者自述:这款开源软件可能没有未来了!》 +- 《因躁狂症失业,知名开源项目作者 “在线求打钱”》 +- 《背负着整个现代网络,却因 “缺钱” 放弃开源,core-js 负责人痛诉:“免费开源软件的根基已经崩塌了”》 +- 《资金严重短缺,又一流行开源项目宣布停止功能开发》 + +真的是 “闻者伤心、见者落泪”。去年的开源大事记,我们还在谈 “个人英雄主义的黄昏”。如今,这个趋势已经越发明显了。 + +:::info 专家点评 +**卫剑钒**:如果是抱着玩的心态玩开源,就很好,不用考虑钱的问题。如果是生计问题还没有解决,就不要全身心投入开源,当个业余爱好就好。因为开源本身并不是用来赚钱的。 +::: + +### 2.3 知名开源项目陆续停止开发 + +在 2023 年,国内外都有一些著名的开源项目,宣布停止开发,原因各不相同。 + +最离谱的,大概要数 AetherSX2,一款 Android 平台上最好用的 PlayStation 2 模拟器。开发者因为遭受了 "无休止的冒充、投诉、无理要求,甚至是死亡威胁",只能无奈,宣布停止开发。 + +最令人不忍的,则是 aardio,一门专注于桌面软件开发的编程语言,作者因妻子患癌,宣布再无精力维护项目。 + +另外还有一些常见原因,比如缺钱的、开放商倒闭的:Touca、libjpeg-turbo,还有就是开发者丧失兴趣,不再有精力维护的:Peek、wangEditor、lodash,还有因为技术发展,新时代淘汰了旧技术的:魔趣 (Mokee),还有一系列不再维护的老版本等等。 + +### 2.4 自由软件基金会的 40 年风雨历程 + +1983 年 9 月 27 日,Richard Matthew Stallman(简称 RMS)宣布开发类 Unix 自由软件操作系统的「GNU 计划」,并借此发起自由软件运动。到了 2023 年,自由软件基金会也发布了一篇文章,庆祝 GNU 和自由软件运动四十年。 + +FSF 执行董事 Zoë Kooyman 表示,GNU 不仅仅是基于自由软件的最广泛使用的操作系统,也是指导自由软件运动四十年的哲学理念的核心。他还说道,我们希望四十周年纪念能够激励更多黑客加入 GNU,实现在全世界范围内创建、改进和共享自由软件的目标。 + +但是,也是在 2023 年 4 月,有人发文称,历时近 40 年,自由软件基金会 (Free Software Foundation,FSF) 正在走向消亡。作者认为 “FSF 没有重视起传播自由软件理念,开发、发布和推广 copyleft 许可证,监督自由软件运动的健康发展 这几个核心理念的发展,同时还分心将资源投入到了其他的闲散工作中”。 + +事实上,我们现在的确更多的在谈论开源软件,而不是自由软件了。那么,到底是 “自由软件运动” 已经完成了自己的历史使命,还是有可能通过改革,重振旗鼓呢? + +### 2.5 开源社区老龄化现象 + +开源社区老龄化,应该是一个不可回避的现象了。甚至连一向脾气火爆的 Linus,都开始收敛脾气,谈及 “内核社区老龄化的问题了”。Postgres 社区的老龄化问题也比较严重,主力开发已经 68 岁了。还有 Vim 之父 Bram Moolenaar 因病逝世,以及 GNU 自由软件项目贡献者 Thien-Thi Nguyen 去世的消息,陆续传来。我们应该如何看待“老龄化”这样的现象呢? + +其实,我们还应该看到,更多的年轻人,加入了开源社区,只是他们往往都选择加入了一些更加新奇有趣的年轻项目,而不是那些历史悠久的老牌项目罢了。 + +也许我们真正应该思考的是:那些老牌开源项目,真的必须一直活跃、一直不断的发布新版本吗? + +### 2.6 来自中国开源的一些好消息 + +也不能全都是谈坏消息,毕竟在中国的开源社区,还是有不少好消息的,比如在 4 月份的时候,就已经有官方报道:《我国开源软件开发者数量突破 800 万》。 + +在 2023 年 1 月,Apache Linkis、Apache Kyuubi、Apache bRPC;2月,Apache EventMesh;6月,Apache SeaTunnel、Apache Kvrocks,陆续正式毕业成为 Apache 软件基金会顶级项目。2 月,Jina AI 正式将 DocArray 捐赠给 Linux 基金会,Paralus 正式成为 CNCF 基金会的沙箱项目,7月 Istio 项目正式从 CNCF 毕业。 + +而 openKylin 正式加入 RISC-V 基金会、华为成为中国首个 PyTorch 基金会 Premier 会员、姜宁再度当选 2023 年Apache 软件基金会董事,则表示我们依然在积极的加入并参与国际开源生态,并不断发挥着重要的作用。 + +在 2023 年 2 月,继 ALC(Apache Local Community)北京、深圳之后,ALC 还成立了西安分站。与此同时,开源社也启动了 KCC(Kaiyuanshe City Community)计划,到年底时,已经发展了包括北京、长沙、成都、大连、杭州、南京、广州、上海、深圳、新加坡和硅谷,共十一个城市。 + +2023 年 3 月,继开放原子开源基金会之后,中国的第二个开源基金会 “天工开物基金会” 在重庆正式成立。后来也陆续发起了 “SigStore 中国社区”、“开源创新教育联盟” 等组织,目前已有三个开源项目正式捐赠给天工开物开源基金会。期盼未来国内有更多的优质基金会成立,立足中国、贡献全球。 + +**2023年度开源相关大会搜集表** + +* 2月 + * 深圳:首届开源鸿蒙大会 +* 3月 + * 北京:首届 OSPO Summit 开源管理办公室峰会 + * 北京:Dev.Together 开发者生态峰会 +* 4月 + * 苏州:移动云大会-开源主题活动论坛 + * 上海:openEuler Developer Day +* 5月 + * 上海:全球开源技术峰会(GOTC) +* 6月 + * 北京:智源大会-AI 开源论坛 + * 北京:开放原子全球开源峰会 + * 北京:第 18 届开源中国开源世界高峰论坛 +* 7月 + * 北京:中国互联网大会-开源供应链论坛 + * 台北:COSCUP 開源人年會 +* 8月: + * 上海:世界人工智能大会-开源学习论坛 + * 北京:CommunityOverCode Asia 2023 阿帕奇软件基金会亚洲大会 +* 9月 + * 上海:KubeCon + CloudNativeCon + Open Source Summit + * 上海:GOSIM(全球开源创新大会) + * 上海:外滩大会-开源论坛 + * 北京:OSCAR 开源产业大会 +* 10月 + * 武汉:开源新疆界:天工开物多元合作峰会 + * 长沙:CCF 中国开源大会 + * 长沙:1024 程序员节 + * 成都:COSCon 第八届中国开源年会 +* 12月 + * 北京:OpenInfra Days China 2023 开源基础设施开发者日 + * 三亚:开源计算机系统大会 + * 北京:操作系统大会&openEuler Summit + * 无锡:开放原子开发者大会 + * 上海:开源产业生态大会 + +### 2.7 各国政策对于开源生态的影响 + +谈到开源生态,就不得不提到各个国家、地区制定的开源相关政策,都会对开源社区、商业、生态产生全方位的影响。简单的归纳,可以分为以下几类: + +- **政府对于开源的扶持政策**,在 2023 年 7 月的报道中,有研究就发现 “英国科技总增值 27% 来自开源,价值达 135.9 亿英镑”,中国更是从中央到地方,都有一系列的政策出台。有专门扶持特定开源项目的(深圳),有定点资助特定基金会项目的(北京),也有推动开源技术与特定产业整合的,不一而足。具体会对开源产业与生态产生何等程度影响,我们在今后的几年拭目以待。 +- **开源成为国际间竞争的武器**,无论是 Github 阻止来自俄罗斯公司的开发者贡献,还是有美国议员,提议在 RISC-V 领域限制中国的发展,以及各种各样已经出台或尝试制定的 “限制出口” 政策,都让下面这则路透社的报道,显得迫在眉睫:《开源软件成为贸易战的重要环节》! +- 围绕**开源安全,政策层面**也有不少的动作,无论是美国、欧盟还是中国,都出台了一系列围绕 “开源安全”、“AI 合规” 相关的法案与法规。这也令开源社区喜忧参半,喜的是安全领域越来越受到政府重视,而忧的是不合理的政策法规,可能会束缚开源技术的发展。 + +## 三. 开源治理大事记 + +开源治理大致可分为社区治理、项目治理和风险治理。而风险治理里包含了多种风险,如伦理道德与社会风险、法律合规风险(包含许可证)、供应链风险、安全风险等领域。由于开源安全的特殊重要性,我们单独整理了开源安全大事记,作为本文的第五部分。 + +2023 年是人工智能井喷的一年,是全球人工智能大神们争论是否应该限制人工智能发展速度的一年,是全球主要强国或区域(包含欧盟、美国和中国)彼此博弈同时着重立法规范人工智能的一年,更是开源与人工智能交会,试图定义开源人工智能最关键的一年。 + +全球开源基金会与组织忧心忡忡地举办了多次线上线下的交流与讨论,试图唤起全球政策制定及立法者摒弃技术民族主义和地缘政治恶意,携手合作,透过开源,共同面对人工智能新范式的挑战。然而,全球碎片化开源社区的声音,以及亚洲(尤其是中国)的政策制定者的影响力,显然还有待加强。 + +因此,今年我们在开源人工智能治理相关的大事件加重了分量。限于篇幅,除了传统的社区治理和风险治理大事件,众多项目治理的事件则揉入社区治理和风险治理章节里,不再单独列出。 + +### 3.1 社区治理 + +#### 3.1.1 Rust 社区相关争议 + +Rust 社区在 2023 年经历了一系列的社区危机和治理变革。以下是一些主要的事件和结果: +- Rust 团队在经历了一段时间的内部分歧和争议后,宣布建立了一个新的领导委员会,将权力下放给各个工作组,并公布了一个新的治理模型草案,旨在提高 Rust 项目的透明度、包容性和协作性。但是不久之后,Rust 社区管理再次出现 “内讧”,一些外部专家和贡献者遭到了一些核心成员的排挤和攻击,导致他们离开了 Rust 项目,同时一些核心成员也因为不满和压力而主动请辞,造成了 Rust 项目的人才流失和管理混乱。随后,Rust 内部的冲突和分裂达到了顶点,一些不满 Rust 现有的设计和方向的开发者宣布从 Rust 分叉出一种新的编程语言,名为 Crab,声称 Crab 会更加忠于 Rust 的初衷和理念,更加灵活和高效。 +- Rust 基金会拟定了一份新的商标政策,规定了 Rust 商标的使用范围和条件,引发了社区的不满和反对,认为这会限制 Rust 生态系统的发展和创新,影响数百个项目的名称和标识。随后,Rust 基金会针对商标争议发表了一份声明,深表歉意,承认自己在沟通和咨询方面的不足,表示愿意重新审视和修改商标政策,与社区进行更多的对话和协商。 +- Rust 社区管理的问题再次升级,RustConf 的组织者在没有征求意见和通知的情况下,撤换了一些原定的主讲人,引发了社区的强烈反对和抗议,一些知名的 Rust 开发者和演讲者宣布退出 RustConf,甚至退出 Rust 社区。 +- Rust 语言的创始人 Graydon Hoare 在接受采访时表示,他对 Rust 社区的冲突和分裂感到无奈和沮丧,他认为 Rust 已经偏离了他最初的设想和目标,他已经无法控制和拯救 Rust 的局面,他希望社区能够自己解决问题,不要再打扰他。 + +虽然 Rust 语言在 2023 年经历了一些社区危机和治理变革,但也公布了 2024 年的路线图,重点涉及降低学习门槛、扩展生态系统和完善开发流程三个方向。 + +Rust 语言的设计团队表示,他们的目标是简化程序,使开发者只需处理其领域的固有复杂性,不再需要处理 Rust 的意外复杂性,同时也赋予库作者更多的权力和灵活性,以满足用户的需求和创新。 + +另外,一些观察者认为,Rust 语言在 2021 年证明了自己的稳定性、性能和生产力,正在朝着易用的方向发展。相信随着学习成本、使用成本的进一步降低,Rust 将迎来爆发式增长。Rust 语言不仅是为应对当今挑战而设计的语言,也是为应对未来挑战而设计的语言,它对安全性、并发性和性能的关注、日益广泛的应用显示 Rust 语言将会继续存在,但是社区治理仍然是首要解决的问题。 + +#### 3.1.2 红帽相关争议 + +红帽公司在 2023 年引发了一场开源界的风波,涉及到其旗下的 RHEL(Red Hat Enterprise Linux)和 CentOS(Community Enterprise Operating System)两个 Linux 发行版的源码发布和许可问题。以下是一些主要的事件和结果: + +- 红帽公司宣布将不再公开 RHEL 的完整源码,而是只提供补丁和更新,同时要求 RHEL 的下游发行版(如 CentOS、Rocky Linux、AlmaLinux 等)必须在 30 天内重新编译和发布自己的版本,否则将失去对 RHEL 的兼容性和支持 。这一举措引发了开源社区的强烈反应,认为红帽公司背叛了开源的精神和原则,试图通过限制 RHEL 的源码访问,挤占开源的份额,实现自身的盈利目的,同时也给 RHEL 的下游发行版造成了巨大的困难和压力 +- 红帽公司对此做出了回应,表示他们并没有违背开源的承诺,而是为了保护 RHEL 的品牌和质量,防止一些不良的行为和滥用,同时也为了鼓励更多的用户和开发者直接使用 RHEL,享受其提供的服务和支持。 +- CentOS 作为 RHEL 的最大的下游发行版,受到了最大的冲击,它的生态和社区面临着分裂和衰落的危机,一些用户和开发者纷纷转向其他的 Linux 发行版,如 Debian、Ubuntu、Fedora 等,认为 CentOS 已经失去了其存在的意义和价值。 +- 甲骨文和 SUSE 两家公司趁机出击,对红帽公司进行了嘲讽和挑衅,表示他们将继续支持和维护 RHEL 的下游发行版,甚至投入了大量的资金和人力,创建了自己的 RHEL 分支,如 Oracle Linux 和 SUSE Linux Enterprise Server,试图抢占 RHEL 的市场和用户。 +- 红帽公司再次发表了一份声明,解释了他们为什么要改变 RHEL 源码的发布策略,称他们是为了提高 RHEL 的安全性、稳定性和可靠性,同时也为了促进 RHEL 的创新和发展,他们表示他们仍然尊重和支持开源社区,欢迎更多的合作和反馈。 + +### 3.2 风险治理 + +#### 3.2.1 伦理道德与社会风险 + +人工智能技术的发展和应用引发了一些伦理道德和社会风险的争论和关注,涉及到人类的安全、自由、隐私、责任等方面。以下是一些主要的事件和观点: +- **一份由马斯克、霍金等知名人士和机构签署的公开信**,呼吁国际社会暂停开发和使用致命的自主武器,以防止人工智能引发的战争和暴力。而周鸿祎则表示,不发展人工智能才是最大的不安全,因为人工智能可以帮助人类解决很多问题,而且人类可以通过法律和监管来控制人工智能的使用。 +- **一份由 AI 教父 Geoffrey Hinton 与 OpenAI CEO Sam Altman 及其首席科学家 Ilya Sutskever 等近 400 名 AI 领域的专家和学者联合签署的 22 字的声明预警**:AI 可能灭绝人类!这份声明警告:人类如果不对 AI 加以控制,AI 可能会超越人类的智能和能力,甚至会威胁到人类的生存。他们呼吁建立一个全球的 AI 监督机构,以确保人工智能的安全和可控。 +- **全球多个开源基金会和组织在日内瓦举行了一场国际会议**,探讨了 AI 和开源的关系、挑战和机遇。会议认为,开源是促进 AI 创新和合作的重要途径,也是保障 AI 伦理和社会责任的有效手段。此外,全球众多有识之士指出,开源是 AI 发展的必然趋势,开源让 AI 的研究和应用更加透明、公平和可信,也可以让更多的人参与和贡献 AI 的进步,从而避免 AI 的垄断和滥用。 +- **AI 领域的三位图灵奖得主,即吴恩达、Hinton 和 Bengio 在社交媒体上的一场激辩**,主要围绕美国政府对 AI 技术的限制和禁令展开。吴恩达批评美国的禁令阻碍了 AI 的开源和交流,损害了 AI 的发展和创新,而 Hinton 和 Bengio 则认为美国的禁令是出于安全和伦理的考虑,是对 AI 的合理管控。 + +AI 技术的发展和应用反映出了不同的意识形态和价值观的分裂,以及这些差异对全球人道危机的影响。AI 技术的发展和应用不仅是技术问题,也是政治、经济和社会问题,需要在全球范围内建立共识和合作,以实现 AI 的可持续和公正的发展。 + +#### 3.2.2 全球AI法律法规政策文件频出 + +2023年,全球范围内围绕AI出台了众多法律法规政策文件,例如:中国国家互联网信息办公室等七部委联合公布的《生成式人工智能服务管理暂行办法》,中共中央网络安全和信息化委员会办公室发布的《全球人工智能治理倡议》,美国白宫发布的《关于安全、可靠和可信的AI行政命令》,欧洲议会、欧盟成员国和欧盟委员会达成的《人工智能法案》,28个国家和欧盟的政府代表共同签署的国际声明《布莱切利宣言》。 + +其中,部分文件内容中体现了对开源AI技术的推动和保护,例如:中国的《全球人工智能治理倡议》中指出,“鼓励全球共同推动人工智能健康发展,共享人工智能知识成果,开源人工智能技术。”欧盟《人工智能法案》在第2条“范围”中列明:此规定不适用于以自由和开源许可证提供的AI组件,除非它们作为高风险AI系统或是第二编(编者注:第二编为“禁止的AI实践”)或第四编(编者注:第四编为“透明度义务”)的AI系统的一部分由提供者投放市场或投入使用。此豁免不适用于根据第3条定义的基础模型(编者注:第3条定义的“基础模型”指的是一种AI系统模型,它在大规模的广泛数据上进行训练,旨在产生广泛的输出,并可以适应各种不同的任务)。 + +#### 3.2.3 全球开源组织应对AI治理新挑战 + +2023年6月,Open Source Initiative(OSI)发起了 定义“开源AI” 的倡议以及一系列线上和线下的全球讨论与活动,以应对开源AI治理的挑战。开源社参与了其邮件列表讨论并组织翻译了系列网络研讨会的内容。在已发布的《开源AI定义》的草稿文档中,主要包括序言、开源AI定义、评估许可证的清单三部分,其中对开源AI的定义主要涉及对使用、研究、修改、分享AI系统的授权。 + +2023年6月,Apache Software Foundation(ASF)法律委员会发布了《对贡献者的生成式AI指南》,旨在为使用AI生成的代码(通常情况下,也适用于文档、图片)进行ASF项目贡献的贡献者提供知识产权相关的指导。指南明确了对贡献中的AI生成的部分进行披露的义务,以及贡献AI生成的代码(全部或部分使用AI)应该满足的条件,例如生成式AI工具的条款和条件没有对输出结果的使用施加任何与开源定义(OSI - Open Source Definition)不一致的限制等,并给出了贡献者可以注明所使用的AI工具(可以通过“Generated-by:”标注)的推荐建议。 + +2023年12月,中国信息通信研究院发布了《可信开源大模型案例汇编(第一期)》,报告由中国信通院云计算开源产业联盟与HyperAI超神经共同编制,报告通过调研国内开源大模型的技术细节、应用场景、商业模式、应用治理、发展趋势等,关注开源大模型技术生态及产业链上下游,全面展现开源大模型及其工具链的发展全貌。此外,通过分析入选本次案例的开源大模型行业实践,为我国大模型产业发展提供路径参考。 + +#### 3.2.4 开源AI大模型呼唤新型许可证 + +开源正发展成为AI大模型的主流模式,但由于AI大模型不仅涉及软件代码和文档,还涉及数据、模型架构等,所以传统的开源许可证并不能完全满足AI大模型的需求,这引发了对新型开源许可证的讨论与探索。 + +OSI认为Meta 的 LLaMa 所使用的许可证并不符合开源许可证,因为其存在对某些用户的商业使用限制、某些使用目的的限制,而不满足OSI的“开源定义”中有关许可证不得歧视任何个人、团体或应用领域的要求。TII发布的Falcon-40B,因采用了含有“收入超过100万美元的任何商业应用都要支付10%的授权费”等特殊限制条款的自定义许可证(TII Falcon LLM License)并宣称是开源LLM而被质疑,之后将许可证改为了 Apache 2.0。Hugging Face将TGI的许可证从Apache 2.0改为了HFOIL(Hugging Face Optimized Inference License) ,同时承认该许可证由于存在对销售基于TGI构建的托管或托管服务的特殊限制而并非开源许可证。 + +截至2023年底,在Hugging Face(全球最大的大模型托管平台之一)上,有近50万的模型,其采用的许可证主要包括了传统的开源软件许可证、CC许可证、针对AI的新型许可证等,最受欢迎的三个许可证依次是Apache 2.0、MIT、OpenRAIL。其中,OpenRAIL许可证由RAIL(Responsible AI License)倡议组织提出的RAIL许可证升级而来,在借鉴传统开源软件许可证的基础上,同时考虑人工智能所带来的风险而采取了附带行为限制的许可,并且针对源代码、应用、模型、数据设置了相对应的不同许可证OpenRAIL-S、OpenRAIL-A、OpenRAIL-M、OpenRAIL-D。 + +国内相关标准和研究机构也在积极推进AI领域许可证的创新实践。2023年5月,中国信息通信研究院也联合产业各方编制发布了《纸鸢开放人工智能模型许可证 第1版》,对模型(及其衍生品、配套资料)的使用进行规范,不适用于模型的训练数据。2023年8月,上海交通大学智慧法院研究院与人工智能研究院、上海市白玉兰开源开放研究院共同举办木兰·白玉兰开放数据许可协议2.0框架设计专题研讨会,将许可协议功能定位为人工智能数据的开放许可。2023年12月,开放原子开源基金会联合木兰开源社区、OpenI启智社区等各界,共同研制的《木兰-启智模型许可证》(Beta版)在2023木兰开源大会发布,许可证适用于通过算法训练获得的模型及其补充材料,包括模型结构、参数、权重等,不包括训练模型的算法及算法源代码。 + +:::info 专家点评 +**卫剑钒**:这些争议相信很快就会不再存在,法律上只要明确了,类似的问题就都会迎刃而解。 +::: + +#### 3.2.5 我国开源领域标准建设正加速推进 + +国家发布政策明确支持开源领域标准建设。2023年8月,工业和信息化部联合科技部、国家能源局、国家标准委正式印发《新产业标准化领航工程实施方案(2023─2035年)》。其中,在新一代信息技术专栏的软件领域明确提出“研制开源术语、许可证、互联互通、项目成熟度、社区运营治理,以及开源软件供应链管理等标准”。 + +2023年4月,全国信息安全标准化技术委员会归口、中国信息通信研究院牵头的国家标准《信息安全技术 软件产品开源代码安全评价方法》形成标准征求意见稿并面向社会公开征求意见。 + +2023年7月,经中国电子工业标准化技术协会审核通过,由中国电子技术标准化研究院牵头的《T/CESA 1269-2023 信息技术 开源 术语与综述》、《T/CESA 1270.1-2023 信息技术 开源治理 第 1 部分:总体框架》、《T/CESA 1270.4-2023 信息技术 开源治理 第 4 部分:项目评估模型》等3项开源领域团体标准正式获批发布。 + +2023年9月,经中国电子工业标准化技术协会审核通过,由中国电子技术标准化研究院牵头的《T/CESA 1270.2-2023 信息技术 开源治理 第 2 部分:企业治理评估模型》、《T/CESA 1270.3-2023 信息技术 开源治理 第 3 部分:社区治理框架》、《T/CESA 1270.5-2023 信息技术 开源治理 第 5 部分:开源贡献者评估模型》、《T/CESA 1291-2023 信息技术 开源 元数据通用要求》等4项开源领域团体标准正式获批发布。 + +2023年10月,经中国通信标准化协会审核通过,由中国信息通信研究院牵头的《开源软件治理能力评价方法 第3部分:成熟度模型》、《开源软件治理能力评价方法 第 5 部分:治理工具和平台》等2项开源领域团体标准正式获批发布。 + +## 四. 开源安全大事记 + +在数字化时代,软件无处不在,成为支撑社会正常运转的基本元素之一。然而,随着软件供应链的复杂性增加,安全问题也日益严重。Log4Shell 漏洞爆发后,开源安全问题备受关注。尽管 2022 年本应是“供应链安全元年”,但这个漏洞仍然普遍存在,修复版本采用率不高,软件供应链攻击频次急剧上升。开源代码的广泛应用使得供应链安全成为关乎生存的问题。Log4Shell 成为头条新闻,让人们认识到开源社区也存在安全风险。此外,开源生态系统中还存在其他被大量依赖的开源项目,其波及范围可能比 Log4Shell 更大,造成更严重的后果。供应链攻击呈急剧上升趋势,自 2019 年以来平均每年增长 742%。因此,我们需要更好地应对开源软件的安全问题。 + +### 4.1 最新趋势和挑战 + +关于开源安全的最新趋势和挑战的分析,主要包括以下几个方面: + +- **恶意软件服务化**:黑客利用开源代码和工具开发和分发恶意软件,形成了一个庞大的黑市,威胁着开源生态系统的安全。 +- **人为错误**:开发者和维护者在使用开源代码时,可能会犯一些低级错误,如忽略安全更新、使用弱密码、泄露敏感信息等,导致开源项目遭受攻击。 +- **供应链攻击**:攻击者通过篡改开源代码库、依赖包或更新渠道,向目标系统植入恶意代码,实施供应链攻击,影响开源项目的信任和可靠性。 +- 法律风险:开源项目在遵守许可协议、处理版权纠纷、应对政策变化等方面,可能会面临一些法律风险,需要及时识别和解决。 +- **安全标准**:开源社区和组织正在制定和推广一些安全标准和最佳实践,如 SLSA、OpenSSF、CII 等,以提高开源代码的质量和安全性。 +- **安全工具**:开源项目可以利用一些开源或商业的安全工具,如 Snyk、Dependabot、CodeQL 等,来检测和修复安全漏洞,提升安全防护能力。 +- **安全教育**:开源项目需要加强安全教育和培训,提高开发者和维护者的安全意识和技能,建立安全文化和流程,防范安全风险。 +- **安全合作**:开源项目需要加强与其他开源项目、组织、企业、政府等的安全合作,共享安全信息和资源,形成安全共同体,共同应对安全威胁。 +- **安全前景**:开源安全的前景是喜忧参半的,一方面,开源项目面临着日益复杂和严峻的安全挑战,另一方面,开源项目也拥有着强大和活跃的安全力量。 + +### 4.2 开源安全的法律责任 + +关于开源应负什么安全方面的法律责任的论战也方兴未艾。目前的主流论点与立法是,开源软件有漏洞,作者需要负责。虽然开源软件是免费提供的,但作者仍然应该尽力确保软件的质量和安全性。漏洞问题可能会导致用户信息泄露、系统遭受攻击等严重后果,因此作者有责任及时修复漏洞并通知用户。因此,目前全球立法的趋势是开源应负网络安全的法律责任。 +- 在**中国**,网络产品、服务的提供者不得设置恶意程序;发现其网络产品、服务存在安全缺陷、漏洞等风险时,应当立即采取补救措施,按照规定及时告知用户并向有关主管部门报告。网络产品、服务的提供者应当为其产品、服务持续提供安全维护;在规定或者当事人约定的期限内,不得终止提供安全维护。网络产品、服务具有收集用户信息功能的,其提供者应当向用户明示并取得同意;涉及用户个人信息的,还应当遵守相关法律、行政法规关于个人信息保护的规定。 +- **欧盟网络韧性法案 (CRA)** 旨在加强欧盟数字产品的网络安全,整合现有网络安全监管框架。该法案对包括软件在内的数字产品提出了大量网络安全要求。该法案与《高度共同网络安全指令》(NIS 2指令)《网络安全法》《人工智能法案》和《通用数据保护条例》(GDPR)等有着密切联系,并有可能成为最重要的欧盟网络安全法律之一。 + - CRA 法案适用于所有直接或间接连接到另一设备或网络的数字产品,其中,数字产品包括 “任何软件或硬件产品及其远程数据处理解决方案,包括单独投放市场的软件或硬件组件”。该法案将适用于这些产品从设计阶段到淘汰阶段的整个生命周期。 + - 该法案规定了经济运营者的一般义务,制造商的具体义务,进口商的具体义务,经销商的具体义务,主管机关和处罚措施。如果违反《网络弹性法案》附件I中规定的网络安全要求和制造商的义务,可能会被处以最高 1,500 万欧元或上一财政年度全球年营业额的 2.5% 的罚款。 + - 由于该法案的规定是针对数字产品的网络安全,因此开源开发者和贡献者需要确保其开源软件符合该法案的网络安全要求。如果开源软件不符合该法案的要求,则可能会被处以罚款或其他处罚。 + +### 4.3 2023 年一些重要的开源安全大事件 + +#### 4.3.1 Log4j 漏洞复活 + +Log4j 是一个广泛使用的 Java 日志框架,2020 年 12 月被发现存在一个严重的远程代码执行漏洞,可以让攻击者通过发送恶意日志信息来控制服务器。这个漏洞影响了数百万的应用程序和设备,包括阿里巴巴、腾讯、京东、亚马逊、谷歌等知名企业。Log4j 的开发团队在发现漏洞后迅速发布了修复版本,但由于修复方案存在缺陷,导致漏洞仍然可以被绕过。因此,开发团队又陆续发布了多个更新版本,最终在 2021 年 1 月发布了 Log4j 2.15.0,宣布彻底修复了漏洞。 + +然而,近日有安全研究人员发现,Log4j 2.15.0 仍然存在一个新的远程代码执行漏洞,可以让攻击者通过发送特定的日志信息来触发 Java 序列化漏洞,从而执行任意代码。这个新漏洞被命名为 CVE-2021-44228,与之前的漏洞同样严重,甚至更难防范。为了应对这个新漏洞,Log4j 的开发团队又紧急发布了 Log4j 2.16.0,禁用了日志消息中的 Java 序列化功能。同时,安全专家建议用户尽快升级 Log4j 版本,关闭不必要的日志记录,使用防火墙和入侵检测系统来监控和阻止恶意流量。 + +#### 4.3.2 Linux 恶意软件增长率飚至 50% + +- **Linux 恶意软件数量激增**:根据 Atlas VPN 的数据分析,2022 年 Linux 恶意软件威胁数量增长了 50%,达到 190 万个,是近年来的最高水平。 +- **Linux 恶意软件的类型和目的**:Linux 恶意软件主要包括木马、僵尸网络、勒索软件、挖矿软件等,它们的目的是窃取数据、控制设备、敲诈勒索或利用计算资源。 +- **Linux 恶意软件的传播方式**:Linux 恶意软件通常通过网络服务、电子邮件、网页、移动设备等途径传播,利用系统漏洞、弱口令、社会工程等手段感染目标。 +- **Linux 恶意软件的防范措施**:建议 Linux 用户和管理员采取一些防范措施,如定期更新系统和软件、使用强密码和双因素认证、安装可靠的防病毒软件、避免打开可疑的链接和附件等。 + +#### 4.3.3 npm 供应链面临的新威胁:清单之惑 + +清单之惑是指 npm 注册没有根据 tarball 包的内容验证清单信息,导致攻击者可以利用这一漏洞来隐藏恶意代码或依赖项。这一漏洞的根源在于 npm API 要求维护人员在提交包的 PUT 请求中以及在随 tarball 包上传的 package.json 文件中都要提供有效的清单,但这两个清单可能不一致。 +- **清单之惑的影响和危害**:清单之惑可以让攻击者绕过 npm 的安全检查,将恶意代码或依赖项植入到受信任的包中,从而影响到数百万的 npm 用户和项目。这种攻击方式可以用来窃取敏感信息、执行远程命令、传播恶意软件等。 +- **清单之惑的解决方案和建议**:清单之惑目前还没有被 npm 官方修复,因此需要开发者和维护人员采取一些措施来防范这一漏洞。一些可能的解决方案和建议包括:使用 npm shrinkwrap 或 package-lock.json 来锁定依赖版本;使用 npm audit 或其他工具来检查包的安全性;避免使用不受信任的源或镜像来安装包;在发布包之前,检查清单信息是否与 tarball 包的内容一致。 + +#### 4.3.4 Electron 惊现十级漏洞! + +- **Electron 漏洞的发现和影响**:Electron 是一个开源框架,用于构建跨平台的桌面应用程序。它被数百万的应用程序使用,包括微信、钉钉、VS Code 等。最近,Electron 被曝出一个十级漏洞,该漏洞允许攻击者通过发送恶意链接,远程执行任意代码。该漏洞已经被苹果和谷歌发现并披露,但由于缺乏关键信息,导致很多 Electron 应用程序未能及时修复,仍然面临风险。 +- **Electron 漏洞的原因和解决方案**:Electron 漏洞的根源在于它使用了过时的 Chromium 内核,而 Chromium 内核存在一个已知的漏洞,即 CVE-2023-4863。该漏洞利用了 Chrome 的沙箱逃逸机制,可以绕过浏览器的安全限制,执行任意代码。为了修复这个漏洞,Electron 需要升级到最新的 Chromium 内核,或者禁用沙箱功能。Electron 团队已经发布了新的版本,修复了这个漏洞,建议所有的 Electron 应用程序开发者尽快更新。 +- **Electron 漏洞的启示和建议**:Electron 漏洞暴露了开源软件的安全风险,以及漏洞披露的不足。开源软件虽然有很多优势,但也需要及时更新和维护,避免使用过时的依赖库。漏洞披露也需要更加透明和及时,提供足够的信息,让相关的开发者能够快速响应和修复。此外,开发者也需要加强自己的安全意识,使用安全的编码规范,检查和测试自己的代码,防止潜在的漏洞和攻击。 + +#### 4.3.5 解决 2,900 个漏洞,Google 奖励 1,200 万美元 + +* 谷歌的漏洞奖励计划(Vulnerability Reward Program,简称VRP)旨在提高其产品和服务的安全性,向发现漏洞的安全研究员支付奖金。 +* 2022年,谷歌通过 VRP 共支付了 1,200 万美元,修复了 2,900 多个安全问题。其中,最高单笔奖励达到 60.5 万美元。这些安全研究员来自 68 个不同国家/地区,共有 703 人获得奖励。 +* 此外,谷歌还将扩大 VRP 的适用范围,包括最新版本的 Google Nest(智能家居设备)和 Fitbit(可穿戴设备)。 + +#### 4.3.6 GitHub 增加 SBOM 导出功能,使其更易于符合安全性需求 + +- **GitHub 增加 SBOM 导出功能**:GitHub 宣布推出一项新功能,让开发者可以轻松地生成和导出软件构建材料清单(SBOM),以提高软件供应链的安全性和透明度。 +- **SBOM 的作用和标准**:SBOM 是一种描述软件组件和依赖关系的文档,可以帮助开发者、审计者和维护者了解软件的来源、结构和漏洞。目前,有多种不同的 SBOM 格式和标准,如 SPDX、CycloneDX 和 SWID。 +- **GitHub 的 SBOM 生成和导出流程**:GitHub 利用其 CodeQL 和 Dependabot 等工具,可以自动分析代码库中的语言、框架和依赖项,并生成相应的 SBOM。开发者可以在 GitHub 的安全标签下找到 SBOM,并选择导出为 SPDX 或 CycloneDX 格式的文件。 +- **GitHub 的 SBOM 与行业合作**:GitHub 表示,其 SBOM 功能是与开源社区和行业组织合作开发的,旨在支持软件安全的最佳实践和政策。GitHub 还加入了美国商务部的软件构建材料清单倡议,以推动 SBOM 的普及和标准化。 + +#### 4.3.7 OpenAI、谷歌微软等设立一千万美元 AI 安全基金 + +OpenAI、谷歌微软等多家科技公司和研究机构联合设立了一个价值一千万美元的基金,用于支持和奖励有关 AI 安全和道德的研究项目。 +- 其目标是促进 AI 的负责任和可信赖的发展,防止 AI 造成潜在的风险和伤害,例如侵犯隐私、歧视、误导、操纵等。 +- 基金的发起方表示,这是一个重要的举措,旨在推动 AI 领域的多样性、透明度和协作,提高 AI 的社会效益和公平性,保护人类的价值和权利。基金也希望吸引更多的研究者和机构参与到 AI 安全和道德的探索中,共同为 AI 的未来做出贡献。 +- 基金将由一个独立的委员会管理,委员会由来自不同领域和背景的专家组成,包括 AI 研究者、社会科学家、伦理学家、法律学者等。基金将定期发布公开征集研究提案的通知,根据评审标准和流程选择最优秀的项目进行资助。 + +综上所述,开源软件需要更好的安全风险治理机制,包括质量标准、安全审计、漏洞奖励、责任分担等。同时,开源软件也需要更多的支持和投入,包括资金、人力、社区等。开源软件的未来取决于我们如何应对当前的危机,以及如何建立更加可持续和安全的开源生态系统。 + +## 五. 开源商业大事记 + + + +### 5.1 早期阶段融资事件 + +- **开源数据库管理工具 DBeaver 获得 600 万美金天使轮融资** + +DBeaver 2013 年开源,其基于 Java 开发,可以运行在各种操作系统之上,是一个免费开源的通用数据库管理和开发工具。其创始人在 2017 年成立商业化公司来提供企业级支持,并研发企业版本。目前 DBeaver 已经拥有 800 万用户,超 5,000 的付费客户,包括 IBM、 Samsung 和 Moody’s。 + +- **开源大模型公司 Together 获得 2000 万美金融资** + +开源大模型初创公司 Together 期望「通过提供跨计算和一流基础模型的开放生态系统,引领 AI 的 Linux 时刻」,其获得了 2000 万美金种子轮融资。Together 正在构建一个用于运行、培训和微调开源模型的云平台。Together 的首批项目之一 RedPajama 旨在培育一套开源生成模型 Together 目前其已经开源了 1.2 万亿 token 训练数据集,Together 的开源平台允许商业化。 + +- **开源 AI 和数据流编排平台 Union AI 获得 1910 万美金的 A 轮融资** + +Union AI 提供 Flyte 托管服务(编排 ETL、机器学习工作流),还构建了 Pandera(数据测试框架)和 Union ML(位于 Flyte 之上的框架,可帮助团队使用现有工具集构建和部署模型),并在今年推出了 Union Cloud,获得了由 NEA 投资的 1910 万美金 A 轮融资。 + +- **开源 DB for AI 公司 MindDB 获得 2500 万美金的种子轮融资** + +MindsDB 定位于 DB For AI 场景,其通过 AI-Table 的方式,将机器学习模型成为数据库中的虚拟表,连接数据和模型,使得用户可以直接在数据库中建模,省去了数据处理、搭建机器学习模型等繁琐的步骤,加速 AI 应用的落地。MindDB 在 2023 年连续获得多轮融资,总金额近 5000 万美金。 + +- **明星开源 LLM 公司 Mistral AI 获多轮融资,跻身独角兽行列** + +由 Meta 和谷歌的科学家成立的 Mistral AI,近期发布了开源 Moe 大模型 Mixtral 8X7B,引发巨大关注。Mistral AI 去年也完成多轮融资,在最近的 A 轮融资中获得 4.15 亿美元,目前估值超过 20 亿美金。 + +- **模型持续测试验证工具 Deepchecks 获得 1400 万美金天使轮融资** + +以色列公司 Deepchecks 定位于 ML 持续测试验证领域,其允许客户重用和自定义组件以全面测试 ML 模型和数据集。Deepchecks 在 2020 年推出了开源版 ML 测试工具,并在今年初推出商业版 Deepchecks Hub。 + +截至目前,开源产品 Deepchecks 已被下载超过 500000 次,其用户包括 AWS、Booking.com 和 Wix 等。近期 Deepchecks 宣布了 1400 万美金天使轮融资。 + +- **开源组件供应链安全平台 Endor Labs 获得 7000 万美金 A 轮融资** + +Endor 定位于帮助企业监控其开发管道的安全状况,包括可触及和可利用的风险,管理开发人员对代码的访问,并密切关注硬编码在其代码库中的秘密。近期他们获得了由 Lightspeed Venture Partners 领投的 7000 万美元 A 轮融资 + +- **AutoGPT 完成 1200 万美元融资** + +AutoGPT 采用 GPT-4 和 GPT-3.5 等语言模型,构建多功能智能体,可独立执行任务并不断提高性能。项目上线五十多天,拥有 13.1w star,2.67 万 fork,是GitHub 历史上增长速度最快的项目之一。 + +### 5.2 中后期阶段融资事件 + +- **英国 MLOps 公司 Seldon 获得 2000 万美金 B 轮融资** + +Seldon 成立于 2014 年,致力于解决 AI Model 在生产环节的部署、监控、管理和可解释性问题。2020 年 A 轮融资至今,Seldon 的开源产品安装量获得了 YoY 400% 的增长。 + +- **Temporal 获得 7500 万美金融资** + +基于 Uber 开源的分布式任务编排和调度引擎 Cadence 创立的新公司 Temporal 获得 7500 百万美金新一轮融资,投前估值达 14 亿美金。 + +- **SAST/SCA 开源开发安全厂商 Semgrep 获得 C 轮融资** + +Semgrep 从 SAST 领域切入,其推出了 SAST 引擎,用户可以将其与自己的 CICD 流程以及 Github,Gitlab 等代码托管平台相集成,通过 Semgrep 内置以及自定义的规则进行代码检测。Semgrep 在 2020 年将产品开源,目前已经拥有超 200 万用户,其 2022 年收入相比于 2021 年实现了 7.5 倍增长。 + +- **法国 AI 研究实验室 Kyutai 获 3.3 亿美元投资,致力于所有成果开源** + +法国亿万富翁及 Iliad 的 CEO Xavier Niel 在巴黎创办了一个名为 Kyutai 的 AI 研究实验室。这是一个由私人资金支持的非营利机构,专注于人工通用智能领域的研究。实验室目前已筹得近 3 亿欧元资金,Kyutai 专注于基础 AI 模型研究,并得到了 Scaleway 提供的 Nvidia H100 GPU 顶级计算资源支持。 + +- **开源平台 Replicate 获得 4000 万美金的 B 轮融资** + +开源机器学习模型平台的企业 Replicate ,近日宣布成功完成由 Andreessen Horowitz 领投的 B 轮融资,融资总额为 4000 万美金,将继续加强开源机器学习模型平台。 + + +### 5.3 收并购事件 + +- **AMD 收购开源 AI 软件 Nod.ai** + +AMD 在官网宣布签署最终协议收购 Nod.ai,Nod.ai 将加速在 AMD 高性能平台上部署优化的人工智能解决方案,并强化 AMD 开源软件战略。 + +- **Snowflake 拟收购 Ponder 强化数据云 Python 能力** + +Ponder 是一家将流行数据科学库与数据所在位置连接起来的领先公司,并维护了广泛使用的开源库 Modin,用于可扩展的 Pandas 操作。为了更好地服务 Python 数据从业者,Snowflake 宣布了对 Ponder 的收购意向。 + +- **思科宣布计划收购云原生网络安全初创公司 Isovalent** + +Isovalent 致力于开发 eBPF 和 Cilium 两项关键开源技术,这些技术能深入洞察操作系统和云原生应用。Isovalent 在云原生计算基金会(CNCF)和 eBPF 基金会中扮演重要角色。为保持这些开源项目的活跃发展,社区的持续支持至关重要。 + + +## 六. 开源教育大事记 + +今年,中国开源年度报告的大事记中特别增加了一项“开源教育”,关于开源教育的定义,其实在不同的组织类型中会有一些差别,在本篇章中,我们希望把开源教育定义为**:采用开源软件和开放的教育资源来支持教育目标。这包括使用开源软件工具、教材和教学资源,以及促进知识共享和协作。开源教育的目标之一是提供更加平等和开放的教育机会,使更多人能够访问高质量的教育资源。** + +在开源教育的模式下,教育资源如教案、课程内容、软件工具等都是开放获取的,任何人都可以使用、修改和分享。这种模式有助于培养学生的创新思维、协作能力和实际解决问题的能力。学生通过参与开源项目,可以接触到业界最新的技术和工具,了解软件开发的实际流程,同时也能为开源社区贡献自己的力量。 + +同时,作为报告起草方,“开源教育”对开源社来说其实并不陌生,从 2014 年成立之初,开源社就积极探索开源与教育的结合点,在正式介绍 2023 年的开源教育大事记之前,我们先来回顾一下开源社在开源教育领域所做的工作: +- 2014 年,开源社发起了中国第一批开源进校园系列活动——“开源者行”; +- 2017 年,开源社执委会下设立了开源教育组、高校合作组等专注开源教育的工作小组; +- 2018 年,开源社举办的第三届中国开源年会(COSCon'18) 中出品了中国第一个“开源教育分论坛”; +- 2019 年,开源社与华东师范大学共同设立了中国第一支“开源教育基金”。 +- 2020 年,开源社录制了《开源特训营》系列节目,旨在开展开源教育入门培训工作; +- 2021 年,开源社在第六届中国开源年会(COSCon'21)中邀请了六位嘉宾分享开源教育,第一次邀请了高校开源学生分享开源教育相关议题; +- 2022 年,开源社积极开始探索开源教育培训相关方向,比如企业开源专项培训等; +- 2023 年,开源社在第八届中国开源年会(COSCon'23)中首次开设了“青年开源教育”分论坛,第一次邀请了小学、中学的青年学生分享开源相关观点。 + +从上述开源社在“开源教育”上的工作发展历程来看,开源和教育的结合已经越来越深入,尤其是开源教育的受众,已经逐渐从开源组织到高等院校、再到中小学,再到更广阔的已就业职后群体。 + +然而,无论是在国内还是国外,符合标准的开源人才依然短缺,根据Linux基金会发布的《The 10th Annual Open Source Jobs Report》显示,绝大多数雇主 (93%) 表示难以找到足够具有开源技能的专业人才,且情况没有缓和,有将近一半 (46%) 的雇主计划在未来六个月内增加对开源人才的招聘,73% 的开源专业人士表示,他们很容易找到新的工作、继续开展开源事业。 + +开源人才的紧缺,让开源教育在全球范围内越来越受到重视,中国也在积极推动开源教育的发展,通过参与开源社区活动、号召贡献开源项目、建立开源教育体系、制定开源人才能力评估标准等措施,来促进开源生态的繁荣和人才培养。通过这些举措,可以让在校学生和职后人群在学习过程中更加深入地理解开源软件的理念,促进理论与实践的结合,提高教育的质量,满足社会对创新型人才的需求。下面就让我们回顾一下2023年的中国开源教育大事记: + +### 6.1 开源教育实践活动逐渐丰富,项目课题制+创新竞赛制成为主流 +2023 年,中国的开源教育实践活动显著增长,吸引了大量学生参与,并得到了多家知名机构的支持。以下是一些主要的实践活动: + +- **开源之夏(OSPP)**:这是由中科院软件所指导的暑期活动,旨在鼓励学生参与开源软件开发。2023 年,来自 592 所高校的 3,475 名学生报名,504 名学生成功入选,贡献了 1,236 个 PR。 +- **GitLink 确实开源编程夏令营(GLCC)**:由中国计算机学会主办,2023 年有来自 139 所高校的 341名学生参与,最终 80 个课题通过中期考核。 +- **第六届中国软件开源创新大赛**:在自然科学基金委信息科学部的指导下,由 CCF 主办,聚焦于“卡脖子”软件领域和前沿技术,设有多个赛道。 +- **第十二届 “麒麟杯” 全国开源应用软件开发大赛**:由中国软件行业协会、开放原子开源基金会、中国计算机学会开源发展委员会、中国开源软件推进联盟的指导,吸引了来自 60+ 高校的 345 组队伍报名,20 支队伍晋级决赛。 +- **2023 开放原子开源大赛**:由工业和信息化部、江苏省人民政府、湖南省人民政府主办,旨在联合开源组织、企事业单位、高等院校科研院所行业组织、投融资机构等多方资源充分发挥产业链生态上下游的协同能力,基于开源共享、共建共治的原则共同举办。 +- **首届中国研究生操作系统开源创新大赛**:由中国研究生创新实践系列大赛主办,专注于操作系统领域的开源创新。 + +此外,还有针对企业的 **2023 开源和信息消费大赛——第四届工业 APP 和信息消费大赛**,由工信部等机构主办,有助于推广开源教育至职场人群。 + +这些活动不仅提高了学生们的技术能力,还促进了开源文化的传播和开源社区的活跃度,为中国开源生态的发展做出了重要贡献。 + +### 6.2 本土开源教育理论支撑开始丰富,开源硬件融入基础教育 +2023 年,中国的开源教育领域不仅在实践层面取得了显著进展,理论基础也日益丰富。高校教师和开源专家开始更加重视开源教育理论的研究,并在不同教学层次和方向上发表了具有代表性的文章。这些研究为开源教育提供了案例和理论分析,展示了开源教育在高等教育和 K12 教育中的应用潜力。 + +**在高等教育阶段**:开源教育被视为一种创新的教学模式,有助于学生学习软硬件开发技能。例如,北京大学、华东师范大学、上海对外经贸大学等高校的教师研究了开源教育在各自学科教学中的应用和价值。 + +**在 K12 教育阶段**:开源教育通常与 STEM、STEAM、机器人/无人机教育、创客教育相结合,尤其是通过开源硬件融入教学。例如,珠海市梅华中学、南京市琅琊路小学等教育机构的教师探索了开源硬件在项目式教学中的应用。 + +此外,上海市教育委员会教育技术装备中心召开了教育无人机、开源硬件课程资源开发交流研讨会,展示了开源硬件在中小学教育中的应用。第十一届中小学 STEAM 教育大会中的开源机器人运动会也展示了开源教育在中小学科创技术教育中的案例和新趋势。 + +这些活动和研究表明,未来开源教育在高等教育和 K12 教育中的推广将有所不同,但都将趋向于开源通识基础教育和开源软硬件开发教育的发展。开源教育不仅有助于提升学生的技术能力,还能促进创新思维和团队合作精神的培养,为中国教育体系的多元化发展做出贡献。 + +探索开源教育和高等教育结合的文章有: +- 北京大学软件与微电子学院的荆琦、冯惠《产教融合下的双轨制开源教学模式探索——以北京大学“开源软件开发基础及实践”课程为例》 +- 华东师范大学数据科学与工程学院王伟、同济大学赵生宇《从开源科技的数字化洞察看开源教育的未来》 +- 上海对外经贸大学张国锋《高校引入开源教育的价值和意义》 +- 长沙理工大学计算机与通信工程学院的黄浩炜《融合开源软件思想与实例的软件工程课程教学研究》 +- 新乡医学院三全学院智能医学工程学院的王晓娜、丁丹、班戈《高校开源教育视角下基于区块链的新医科数学课程群建设》 +- 南京邮电大学陶卓、王凯、葛伟《开源生态培育下的创新型软件人才培养》 + +探索开源教育和 K12 教育结合的文章有: +- 广东省珠海市梅华中学的索芳《STEM 教育理念下的开源硬件项目式教学实践探究——以<创意发光服饰>为例》 +- 南京市琅琊路小学的戚韵东《基于开源架构项目研究共同体推广小主人教育》 +- 朱立新、张香玲、姚自明等人在《教育与装备研究》期刊上发表的《信息技术教育开源硬件芯片研究》 +- 上海市复兴高级中学奚骏:《创客与开源硬件为教育注入新活力》 + +### 6.3 开源教育论坛兴起,开源+教育圈越扩越大 +2023 年,开源教育在中国的发展呈现出明显的上升趋势,这一点从数量增加、频率提高、质量提升的专门讨论开源教育的大会中可以看出。这些大会不仅展示了开源教育的影响力,还促进了教育界与开源社区的深度交流与合作。 + +一些知名的大会和论坛包括: +- **2023 GAIDC 全球开发者先锋大会**:在这个国际性的开发者大会上,开源技术论坛展示了开源在全球范围内的应用和发展。 +- **第二届中国开源教育研讨会(SOSEC-2)和第三届中国开源教育研讨会(SOSEC-3)**:分别在广州和上海召开,聚焦于中国开源教育的发展现状和未来趋势。 +- **全国高校新商科开源创新教育研讨会**:在上海举行,探讨了开源在教育领域的应用,特别是在商科教育中的融合。 +- **第四届中国计算机教育大会**:首届计算机开源教育论坛作为其中的一部分,强调了开源在计算机教育中的重要性。 +- **2023 中关村论坛——世界开源创新发展论坛**:以“开放科学背景下的开源教育”为主题,讨论了开源教育在科学研究中的作用。 +- **GOTC 2023**:召开Linux 基金会的开源教育及人才培养峰会,突出了开源技术在人才培养中的关键角色。 +- **2023 开放原子全球开源峰会**:开源教育与人才分论坛的成功召开,进一步推动了开源教育在全球范围内的讨论和实践。 +- **COSCon'23 第八届中国开源年会**:开设“青年开源教育”分论坛,邀请了OpenTeen中小学的青年学生分享开源实践的相关体验。 + +这些活动和论坛的举办,不仅提高了开源教育在学术界和产业界的影响力,还为教育工作者、学生、开源社区成员提供了一个交流的平台,促进了开源教育资源的共享和最佳实践的传播。随着开源教育论坛的兴起,开源与教育的结合正逐渐成为教育创新和人才培养的新趋势。 + +### 6.4 开源人才培养与认证逐渐成为标准体系 +2023 年,中国开源教育领域迎来了一个重要的发展里程碑,即《开源人才能力要求与评价规范》的启动编制。这一标准的制定,由工业和信息化部人才交流中心联合开放原子开源基金会牵头,来自北京航空航天大学、北京理工大学、华东师范大学、华为、百度、腾讯、小米等校企单位的 36 位参编专家参加了会议,这标志着开源人才教育正式进入国家人才战略培养体系的一部分。该标准的制定对于中国开源人才发展生态圈的建设具有重要意义,它将有助于推动开源软件和技术的高质量发展,通过调研分析和归纳提炼,建立一套科学规范、行业认可的人才能力要求标准。 + +此外,开源师资培训也成为了一个重要的探索方向。例如,长沙市软件和信息技术服务业促进会举办的 2023 湖南省高校 OpenHarmony 师资培训活动,旨在加深高校教师对 OpenHarmony 的应用与理解,提高基于 OpenHarmony 开发技术和授课能力,构建良好的教育信创生态。 + +这些举措和动态表明,中国正在积极构建开源人才培养与认证的标准体系,这不仅有助于提升开源人才的专业能力,还将促进开源技术在教育领域的广泛应用和创新发展。随着开源教育体系的不断完善,可以预期未来将有更多高质量的开源人才涌现,为中国乃至全球的开源社区贡献力量。 + +### 6.5 企业参与开源教育,新型产学研合作模式兴起 +2023 年,中国企业在开源教育领域的参与度显著提高,与高校的合作模式也更加开放和深入。这些合作通常涉及将实际的开源项目引入教育环境,让学生能够参与到高质量的开源项目中,而不是仅仅进行一些基础的操作性工作。以下是一些典型的企业与高校合作案例: + +- **Answer 项目**:被选为北大光华 MBA 的整合实践项目,允许学生参与实际的开源项目。 +- **CloudWeGo 项目**:该项目被纳入北大的研究生课程,让学生能够参与到企业的开源项目中;该项目还与南京大学和浙江大学合作,推动校园合作和开源人才培养。 +- **openKylin**:在天津科技大学成立高校站,专注于开源人才的培养。 +- **平凯星辰(PingCAP)**:由 PingCAP 捐赠三年的CCF 中国数据库暑期学校合作,提供工程实践全部实验;与华东师范大学签订联合博士培养合作协议,旨在促进关键软件高层次人才培养。 +- **OceanBase**:与华东师范大学合作,应对技术挑战,引领分布式数据库科研创新和开源人才培养。 +- **StoneDB**:完成第一届实习生实训,吸纳多所知名高校学生参与,专注开源数据库人才的培养。 +- **腾讯**:通过“开放原子校源行”项目支持开源人才培养,同时启动了 2023 年度犀牛鸟开源人才计划,助力高校开源人才培养。 +- **深开鸿**:与北京理工大学联合举办开源鸿蒙人才培养工作研讨会,并与多所学校合作开设“开源鸿蒙英才班”。 +- **拓维信息**:旗下开鸿智谷参与的雅礼麓谷中学项目入选“2023 年度智慧教育优秀案例”。 +- **中软国际**:与北京理工大学、深开鸿合作,在信息技术创新学院开设首个“开源鸿蒙英才班”。 +- **深开鸿**:与东南大学合作培育高校开源人才,助力 OpenHarmony 人才生态发展。 +- **鸿湖万联**:联合多所学校和公司成立全国 OpenHarmony(开源鸿蒙)智能终端与物联行业产教融合共同体。 + +这些合作模式不仅为学生提供了参与实际开源项目的机会,还促进了企业与高校之间的知识和技术交流。通过这些合作,企业能够更好地了解学生的能力和需求,同时高校学生也能够获得与企业专家直接合作的机会,这对于提升学生的技术能力和职业素养都是非常有价值的。此外,这些合作还有助于推动开源技术的发展和普及,以及为开源社区贡献更多的创新成果。 + +### 6.6 高校开源教育课程体系日益完善,参与开源积极性提高 + +2023 年,中国高校在开源教育方面的努力日益显著,许多高校通过开设特色课程、建立联盟、与企业合作等方式,积极推进开源教育的发展。清华大学、北京航空航天大学、浙江大学、上海交通大学、华东师范大学等全国近百所大学宣布将在未来三年内陆续开设开源软件课程,课程包括开源专业技术、数字公共产品等基础专业课程,帮助学生从零开始、由浅入深地理解开源知识架构,加快软件关键领域的人才培养。以下列举一些具体的例子: + +- **北京大学**: + - 与头哥和 GitLink 合作,打造了《OSS Development 开源软件技术》线上实践课程,结合理论与实践,培养学生的开源软件开发技能。 +- **清华大学**: + - 举办 2023 秋冬季开源操作系统训练营,通过使用 Rust 语言编写操作系统的实践,培养学生的操作系统开发技能。 +- **华东师范大学**: + - 推出了《OSS101 开源软件通识》课程,旨在培养学生的开源意识和技能。 + - 牵头成立CCF 信息系统专委会开源教育工作组,并创立了“社-课-赛-证”一体化开源人才培养体系,以推动开源教育的发展。 +- **南方科技大学**: + - 在启智开发者大会上参与成立开源高校联盟,致力于推动粤港澳大湾区开源生态建设和高校人才培养,辐射全国。 +- **北京理工大学**: + - 与深开鸿合作,举办了开源鸿蒙人才培养和科研合作工作研讨会,加强了校企合作,提升了人才培养质量。 + +这些活动和课程的推出,不仅丰富了高校开源教育的课程体系,也提高了学生参与开源的积极性。通过这些实践,学生能够更好地理解开源软件的开发过程,掌握相关的技能,并参与到开源社区中。这些举措对于培养适应现代数字经济发展需要的高素质开源人才,以及推动开源技术在中国的普及和应用都具有重要意义。 + +### 6.7 各方推进 “开源进校园” 活动,吸引学生关注 +2023 年,要说在开源教育上感知最明显的活动,莫过于各个组织推行的“开源进校园”活动,其中开放原子开源基金会、CCF 开源发展委员会、开源之夏组委会、红山开源举办了较多的校园行活动。 + +- **开放原子开源基金会** + - “开放原子校源行”公益项目由开放原子开源基金会与腾讯公司共同出资设立发起。双方携手通过建立高校开源社团、普及开源文化、研发开源课程体系等方式探索产教融合新路径。 +- **CCF 开源发展委员会** + - 中国计算机学会开源发展委员会发起的“开源高校行”系列活动,在清华大学、北京大学、北航、复旦等名校成功举办,形成了广泛影响和成功实践。 +- **开源之夏组委会** + - 为了让更多学生深入了解开源参与开源,开源之夏活动携手众多优秀开源社区,开启“开源之夏校园行”之旅。开源之夏校园行系列活动旨在近一步激发新生代开发者群体的能量与活力,让更多学生深入了解国内外知名的开源技术、项目及社区,让开源文化普及到更多高校。 +- **红山开源** + - 红山开源社区面向重点高校和重点方向推出“红山开源高校行”活动,提升社区的影响力和知名度,吸引更多优秀创新资源参与开源创造生态构建。 + +此类活动,预计在未来会成为高等院校学子接触开源教育的主流渠道之一。 + +### 6.8 开源教育政策相关 + +2023 年,尽管中国开源教育领域在实践方面取得了显著进展,但在政策层面,开源教育相关的支持性政策相对较少。 + +不过,一些地方政府已经开始关注并推动开源教育的发展。例如,2022 年 12 月 29 日,长风联盟智库基地提议了《关于加强北京市开源人才教育的建议》。该建议对开源教育人才现状和瓶颈进行了系统介绍,并提议北京市政府加强开源人才教育培训工作。作为中国的开源生态高地,北京在推进开源人才教育方面具有重要作用,这对培养适应产业需求的软件人才、建立可持续发展的开源生态、提升软件科技源头创新和技术供给能力、实现向创新链高端跃升具有重要意义。 + +此外,2020 年教育部和工业和信息化部联合发布的《特色化示范性软件学院建设指南(试行)》通知,也对高校投入开源教育产生了积极的推动作用。该指南强调了软件人才培养的特色化建设,探索专业建设规律,围绕关键基础软件、大型工业软件、行业应用软件、新兴平台软件和嵌入式软件对人才的特色化需求,加强先进软件架构、工程方法和算法模型教育。同时,该指南也鼓励**积极培育重点开源项目,汇聚优秀开源人才**,并推动工业技术软件化,对产业创新形成有力支撑。 + +尽管 2023 年开源教育相关的政策性消息并未广泛传播(有可能在制定中),但我们已经看到已有政策文件对开源教育产生了积极影响。展望 2024 年,期待国家相关部委会出台更多开源教育相关的政策,以进一步规范和推动开源教育的实践,促进开源教育在中国的发展。 + + + +## 七. 开源榜单与报告汇总 + +现在,不仅仅是开源社每年会发布《中国开源年度报告》,其他的媒体、组织与机构,也会发布各种类型的开源相关榜单、报告、蓝皮书等等。为了方便读者们掌握一个全貌,我们这里做了一个简单的汇总与整理。 + +### 7.1 一些有价值的报告 + +- 2023 年 2 月,由开源社组织出品的《**2022 中国开源年度报告**》发布,报告主要由四部分组成,分别是大事记篇、数据篇、商业化篇和问卷篇。大事记篇分为开源商业、开源安全、开源技术、开源法律、开源社区与生态五个部分;数据篇由 X-lab 开放实验室、Apache Devlake 社区和 Gitee 联合制作,分为 GitHub 篇和 Gitee 篇呈现;商业化篇由云启资本投资团队撰写,重点关注了开源软件全球化市场的推进;问卷篇结合数据分析手段和调查报告等多种形式,从开源社区中所处角色的视角进行调查,并加入了开源社区度量、开源商业化相关话题。 +- 2023 年 4 月,InfoQ 研究中心发布《**中国开源生态图谱 2023**》,以中国开源项目名录和图谱的形式,为中国开源领域提供便捷易用的工具,让国内开发者、企业、研究院、基金会等开源生态了解中国开源的项目现状,并为中国开源产品添砖加瓦。图谱内共计收录了 931 个中国开源项目,涵盖七大细分领域和生态机构,其中七大细分领域分别为操作系统、数据库、人工智能、云原生、大数据、前端、中间件,生态机构包括实验室/研究院、开源基金会、开源产业联盟、开发者社区和代码托管平台。 +- 2023 年 6 月,由中国开源推进联盟(COPU)牵头,联合中国开发者社区 CSDN、中国科学院软件研究所、开放原子开源基金会、北京开源创新委员会、开源社、开源中国、北京大学、华东师范大学、国防科技大学等 106 家单位,以及 120 多位开源专家和志愿者,携手重磅发布《**2023 中国开源发展蓝皮书**》,力图呈现 2023 年中国开源产业生态全貌、中国开源在技术创新、产业发展方面的真实图谱。 +- 2023 年 12 月,由开源中国与 Gitee 共同撰写的《**2023 中国开源开发者报告**》正式发布。报告共分为三个部分,包括:开源开发者事件回顾、2023 LLM 技术报告、Insight:中国开发者开源新动向。 +- 2023 年 12 月,艾瑞咨询发布《**2023 年中国基础软件开源产业研究白皮书**》。白皮书研究了我国开源软件的发展路径,通过对比讨论国内外开源软件产业演进的发展经验,总结开源软件产业链和产业主体,分析开源软件的商业模式及商业价值,扫描产业中开源项目及各方参与者的主要特征,为读者呈现扎根于我国的开源产业生态图景。 +- 中国信息通信研究院可信开源团队长期开展开源相关研究工作,在 2023 年发布了**系列可信开源报告**,包括《2023年中国企业开源治理全景观察》、《开源知识产权案例集(著作权篇)》、《数字公共产品洞察报告》、《OSPO案例汇编(第二期)》,以及针对前端、数据库、通信等细分行业的开源技术研究报告或案例集。 + +### 7.2 严肃的,值得参考的榜单 + +- **2023 “科创中国” 开源创新榜**:这是由中国科协科学技术传播中心、中国计算机学会、中国通信学会、中国科学院软件研究所共同主办,CSDN 承办的一个评选活动,来自全国学会、大学、科研院所、企业、开源基金会、行业联盟等二十多位开源专家共同参与了本届榜单评审工作,还是相当严肃与严谨的。 +- **中国开源码力榜**:这是有思否、开源社、X-lab 实验室联合发起的一个完全根据 OpenRank 算法,计算得出的一个全新的、专属于开源开发者的榜单。每年会根据贡献度排序,选出 99 位来自中国的开发者。 +- **开源指南针 OSS Compass**:2023 年 2 月发布,是一个用于开源生态健康评估的平台(https://oss-compass.org),面向 GitHub、Gitee 等平台所有开源项目开放。平台由国家工业信息安全发展研究中心、开源中国、南京大学、华为、北京大学、新一代人工智能开源开放平台(OpenI)、百度、腾讯开源联合发起并协作开发,同时平台本身是一个开源项目,围绕该项目形成了开源开放的社区。平台构建了一个包括生产力、稳健性、创新力三个维度,涵盖 14 个指标模型在内的开源生态评估体系。 +- **阿里巴巴开源开发者贡献榜**:这是另一个基于 OpenRank 的贡献者榜单,为了深入理解这一算法和榜单机制对开源社区产生的影响,X-lab 实验室的两位博士生,赵生宇和夏小雅加入阿里巴巴开源办公室,并展开了详尽的研究工作。他们不仅从数据统计的角度出发,分析了榜单上线前后社区项目统计指标上的变化情况,还进行了一系列深度访谈,听取了开发者们的真实声音。这一研究的成果不仅为开源社区的发展提供了宝贵的经验和启示,还被收录在软件工程领域的国际顶级学术会议 ICSE 2024 中,引起了学术界的高度关注。 + +### 7.3 不妨一看的榜单 + +- **中国开源先锋 33 人**:思否与开源社合办,一个完全基于偏好的榜单,每一年由之前历年的开源先锋推荐,出发点很简单:“我想给大家介绍这位朋友、开源人”。然后经过投票,选择的原则也很简单:“我很想认识这位朋友、开源人,也希望更多人能够认识这位朋友。”所以,这个不妨一看的榜单,也希望能够让大家认识更多这样的开源先锋。 +- **OSC 中国开源项目评选及系列榜单**:在 2021 年与 2022 年,开源中国都发起了一系列的评选活动,包括:中国开源项目社区健康案例、最火热中国开源项目社区、优秀国际开源项目中文社区等类型,但是不知为何,在 2023 年没有继续评选了。 + +### 7.4 荒唐的榜单 + +- **开源贡献者榜行榜**:有一个号称是 “国际测试委员会BenchCouncil” 的国际组织,经过了一套号称是科学公正的评分流程,得出了一个号称是 “世界首个开源贡献榜”,这在榜单中,Linux 操作系统内核的创世人 Linus,仅仅排名第 12 名。这个榜单的荒唐之处,也就可想而知了。 + + diff --git a/package.json b/package.json new file mode 100644 index 0000000..a477210 --- /dev/null +++ b/package.json @@ -0,0 +1,12 @@ +{ + "private": true, + "scripts": { + "dev": "vitepress dev", + "build": "vitepress build", + "preview": "vitepress preview" + }, + "devDependencies": { + "@types/node": "^20.11.20", + "vitepress": "1.0.0-rc.36" + } +} \ No newline at end of file diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml new file mode 100644 index 0000000..eb64413 --- /dev/null +++ b/pnpm-lock.yaml @@ -0,0 +1,1028 @@ +lockfileVersion: '6.0' + +settings: + autoInstallPeers: true + excludeLinksFromLockfile: false + +devDependencies: + '@types/node': + specifier: ^20.11.20 + version: 20.11.20 + vitepress: + specifier: 1.0.0-rc.36 + version: 1.0.0-rc.36(@algolia/client-search@4.22.1)(@types/node@20.11.20)(search-insights@2.13.0) + +packages: + + /@algolia/autocomplete-core@1.9.3(@algolia/client-search@4.22.1)(algoliasearch@4.22.1)(search-insights@2.13.0): + resolution: {integrity: sha512-009HdfugtGCdC4JdXUbVJClA0q0zh24yyePn+KUGk3rP7j8FEe/m5Yo/z65gn6nP/cM39PxpzqKrL7A6fP6PPw==} + dependencies: + '@algolia/autocomplete-plugin-algolia-insights': 1.9.3(@algolia/client-search@4.22.1)(algoliasearch@4.22.1)(search-insights@2.13.0) + '@algolia/autocomplete-shared': 1.9.3(@algolia/client-search@4.22.1)(algoliasearch@4.22.1) + transitivePeerDependencies: + - '@algolia/client-search' + - algoliasearch + - search-insights + dev: true + + /@algolia/autocomplete-plugin-algolia-insights@1.9.3(@algolia/client-search@4.22.1)(algoliasearch@4.22.1)(search-insights@2.13.0): + resolution: {integrity: sha512-a/yTUkcO/Vyy+JffmAnTWbr4/90cLzw+CC3bRbhnULr/EM0fGNvM13oQQ14f2moLMcVDyAx/leczLlAOovhSZg==} + peerDependencies: + search-insights: '>= 1 < 3' + dependencies: + '@algolia/autocomplete-shared': 1.9.3(@algolia/client-search@4.22.1)(algoliasearch@4.22.1) + search-insights: 2.13.0 + transitivePeerDependencies: + - '@algolia/client-search' + - algoliasearch + dev: true + + /@algolia/autocomplete-preset-algolia@1.9.3(@algolia/client-search@4.22.1)(algoliasearch@4.22.1): + resolution: {integrity: sha512-d4qlt6YmrLMYy95n5TB52wtNDr6EgAIPH81dvvvW8UmuWRgxEtY0NJiPwl/h95JtG2vmRM804M0DSwMCNZlzRA==} + peerDependencies: + '@algolia/client-search': '>= 4.9.1 < 6' + algoliasearch: '>= 4.9.1 < 6' + dependencies: + '@algolia/autocomplete-shared': 1.9.3(@algolia/client-search@4.22.1)(algoliasearch@4.22.1) + '@algolia/client-search': 4.22.1 + algoliasearch: 4.22.1 + dev: true + + /@algolia/autocomplete-shared@1.9.3(@algolia/client-search@4.22.1)(algoliasearch@4.22.1): + resolution: {integrity: sha512-Wnm9E4Ye6Rl6sTTqjoymD+l8DjSTHsHboVRYrKgEt8Q7UHm9nYbqhN/i0fhUYA3OAEH7WA8x3jfpnmJm3rKvaQ==} + peerDependencies: + '@algolia/client-search': '>= 4.9.1 < 6' + algoliasearch: '>= 4.9.1 < 6' + dependencies: + '@algolia/client-search': 4.22.1 + algoliasearch: 4.22.1 + dev: true + + /@algolia/cache-browser-local-storage@4.22.1: + resolution: {integrity: sha512-Sw6IAmOCvvP6QNgY9j+Hv09mvkvEIDKjYW8ow0UDDAxSXy664RBNQk3i/0nt7gvceOJ6jGmOTimaZoY1THmU7g==} + dependencies: + '@algolia/cache-common': 4.22.1 + dev: true + + /@algolia/cache-common@4.22.1: + resolution: {integrity: sha512-TJMBKqZNKYB9TptRRjSUtevJeQVXRmg6rk9qgFKWvOy8jhCPdyNZV1nB3SKGufzvTVbomAukFR8guu/8NRKBTA==} + dev: true + + /@algolia/cache-in-memory@4.22.1: + resolution: {integrity: sha512-ve+6Ac2LhwpufuWavM/aHjLoNz/Z/sYSgNIXsinGofWOysPilQZPUetqLj8vbvi+DHZZaYSEP9H5SRVXnpsNNw==} + dependencies: + '@algolia/cache-common': 4.22.1 + dev: true + + /@algolia/client-account@4.22.1: + resolution: {integrity: sha512-k8m+oegM2zlns/TwZyi4YgCtyToackkOpE+xCaKCYfBfDtdGOaVZCM5YvGPtK+HGaJMIN/DoTL8asbM3NzHonw==} + dependencies: + '@algolia/client-common': 4.22.1 + '@algolia/client-search': 4.22.1 + '@algolia/transporter': 4.22.1 + dev: true + + /@algolia/client-analytics@4.22.1: + resolution: {integrity: sha512-1ssi9pyxyQNN4a7Ji9R50nSdISIumMFDwKNuwZipB6TkauJ8J7ha/uO60sPJFqQyqvvI+px7RSNRQT3Zrvzieg==} + dependencies: + '@algolia/client-common': 4.22.1 + '@algolia/client-search': 4.22.1 + '@algolia/requester-common': 4.22.1 + '@algolia/transporter': 4.22.1 + dev: true + + /@algolia/client-common@4.22.1: + resolution: {integrity: sha512-IvaL5v9mZtm4k4QHbBGDmU3wa/mKokmqNBqPj0K7lcR8ZDKzUorhcGp/u8PkPC/e0zoHSTvRh7TRkGX3Lm7iOQ==} + dependencies: + '@algolia/requester-common': 4.22.1 + '@algolia/transporter': 4.22.1 + dev: true + + /@algolia/client-personalization@4.22.1: + resolution: {integrity: sha512-sl+/klQJ93+4yaqZ7ezOttMQ/nczly/3GmgZXJ1xmoewP5jmdP/X/nV5U7EHHH3hCUEHeN7X1nsIhGPVt9E1cQ==} + dependencies: + '@algolia/client-common': 4.22.1 + '@algolia/requester-common': 4.22.1 + '@algolia/transporter': 4.22.1 + dev: true + + /@algolia/client-search@4.22.1: + resolution: {integrity: sha512-yb05NA4tNaOgx3+rOxAmFztgMTtGBi97X7PC3jyNeGiwkAjOZc2QrdZBYyIdcDLoI09N0gjtpClcackoTN0gPA==} + dependencies: + '@algolia/client-common': 4.22.1 + '@algolia/requester-common': 4.22.1 + '@algolia/transporter': 4.22.1 + dev: true + + /@algolia/logger-common@4.22.1: + resolution: {integrity: sha512-OnTFymd2odHSO39r4DSWRFETkBufnY2iGUZNrMXpIhF5cmFE8pGoINNPzwg02QLBlGSaLqdKy0bM8S0GyqPLBg==} + dev: true + + /@algolia/logger-console@4.22.1: + resolution: {integrity: sha512-O99rcqpVPKN1RlpgD6H3khUWylU24OXlzkavUAMy6QZd1776QAcauE3oP8CmD43nbaTjBexZj2nGsBH9Tc0FVA==} + dependencies: + '@algolia/logger-common': 4.22.1 + dev: true + + /@algolia/requester-browser-xhr@4.22.1: + resolution: {integrity: sha512-dtQGYIg6MteqT1Uay3J/0NDqD+UciHy3QgRbk7bNddOJu+p3hzjTRYESqEnoX/DpEkaNYdRHUKNylsqMpgwaEw==} + dependencies: + '@algolia/requester-common': 4.22.1 + dev: true + + /@algolia/requester-common@4.22.1: + resolution: {integrity: sha512-dgvhSAtg2MJnR+BxrIFqlLtkLlVVhas9HgYKMk2Uxiy5m6/8HZBL40JVAMb2LovoPFs9I/EWIoFVjOrFwzn5Qg==} + dev: true + + /@algolia/requester-node-http@4.22.1: + resolution: {integrity: sha512-JfmZ3MVFQkAU+zug8H3s8rZ6h0ahHZL/SpMaSasTCGYR5EEJsCc8SI5UZ6raPN2tjxa5bxS13BRpGSBUens7EA==} + dependencies: + '@algolia/requester-common': 4.22.1 + dev: true + + /@algolia/transporter@4.22.1: + resolution: {integrity: sha512-kzWgc2c9IdxMa3YqA6TN0NW5VrKYYW/BELIn7vnLyn+U/RFdZ4lxxt9/8yq3DKV5snvoDzzO4ClyejZRdV3lMQ==} + dependencies: + '@algolia/cache-common': 4.22.1 + '@algolia/logger-common': 4.22.1 + '@algolia/requester-common': 4.22.1 + dev: true + + /@babel/helper-string-parser@7.23.4: + resolution: {integrity: sha512-803gmbQdqwdf4olxrX4AJyFBV/RTr3rSmOj0rKwesmzlfhYNDEs+/iOcznzpNWlJlIlTJC2QfPFcHB6DlzdVLQ==} + engines: {node: '>=6.9.0'} + dev: true + + /@babel/helper-validator-identifier@7.22.20: + resolution: {integrity: sha512-Y4OZ+ytlatR8AI+8KZfKuL5urKp7qey08ha31L8b3BwewJAoJamTzyvxPR/5D+KkdJCGPq/+8TukHBlY10FX9A==} + engines: {node: '>=6.9.0'} + dev: true + + /@babel/parser@7.23.6: + resolution: {integrity: sha512-Z2uID7YJ7oNvAI20O9X0bblw7Qqs8Q2hFy0R9tAfnfLkp5MW0UH9eUvnDSnFwKZ0AvgS1ucqR4KzvVHgnke1VQ==} + engines: {node: '>=6.0.0'} + hasBin: true + dependencies: + '@babel/types': 7.23.6 + dev: true + + /@babel/types@7.23.6: + resolution: {integrity: sha512-+uarb83brBzPKN38NX1MkB6vb6+mwvR6amUulqAE7ccQw1pEl+bCia9TbdG1lsnFP7lZySvUn37CHyXQdfTwzg==} + engines: {node: '>=6.9.0'} + dependencies: + '@babel/helper-string-parser': 7.23.4 + '@babel/helper-validator-identifier': 7.22.20 + to-fast-properties: 2.0.0 + dev: true + + /@docsearch/css@3.5.2: + resolution: {integrity: sha512-SPiDHaWKQZpwR2siD0KQUwlStvIAnEyK6tAE2h2Wuoq8ue9skzhlyVQ1ddzOxX6khULnAALDiR/isSF3bnuciA==} + dev: true + + /@docsearch/js@3.5.2(@algolia/client-search@4.22.1)(search-insights@2.13.0): + resolution: {integrity: sha512-p1YFTCDflk8ieHgFJYfmyHBki1D61+U9idwrLh+GQQMrBSP3DLGKpy0XUJtPjAOPltcVbqsTjiPFfH7JImjUNg==} + dependencies: + '@docsearch/react': 3.5.2(@algolia/client-search@4.22.1)(search-insights@2.13.0) + preact: 10.19.3 + transitivePeerDependencies: + - '@algolia/client-search' + - '@types/react' + - react + - react-dom + - search-insights + dev: true + + /@docsearch/react@3.5.2(@algolia/client-search@4.22.1)(search-insights@2.13.0): + resolution: {integrity: sha512-9Ahcrs5z2jq/DcAvYtvlqEBHImbm4YJI8M9y0x6Tqg598P40HTEkX7hsMcIuThI+hTFxRGZ9hll0Wygm2yEjng==} + peerDependencies: + '@types/react': '>= 16.8.0 < 19.0.0' + react: '>= 16.8.0 < 19.0.0' + react-dom: '>= 16.8.0 < 19.0.0' + search-insights: '>= 1 < 3' + peerDependenciesMeta: + '@types/react': + optional: true + react: + optional: true + react-dom: + optional: true + search-insights: + optional: true + dependencies: + '@algolia/autocomplete-core': 1.9.3(@algolia/client-search@4.22.1)(algoliasearch@4.22.1)(search-insights@2.13.0) + '@algolia/autocomplete-preset-algolia': 1.9.3(@algolia/client-search@4.22.1)(algoliasearch@4.22.1) + '@docsearch/css': 3.5.2 + algoliasearch: 4.22.1 + search-insights: 2.13.0 + transitivePeerDependencies: + - '@algolia/client-search' + dev: true + + /@esbuild/aix-ppc64@0.19.11: + resolution: {integrity: sha512-FnzU0LyE3ySQk7UntJO4+qIiQgI7KoODnZg5xzXIrFJlKd2P2gwHsHY4927xj9y5PJmJSzULiUCWmv7iWnNa7g==} + engines: {node: '>=12'} + cpu: [ppc64] + os: [aix] + requiresBuild: true + dev: true + optional: true + + /@esbuild/android-arm64@0.19.11: + resolution: {integrity: sha512-aiu7K/5JnLj//KOnOfEZ0D90obUkRzDMyqd/wNAUQ34m4YUPVhRZpnqKV9uqDGxT7cToSDnIHsGooyIczu9T+Q==} + engines: {node: '>=12'} + cpu: [arm64] + os: [android] + requiresBuild: true + dev: true + optional: true + + /@esbuild/android-arm@0.19.11: + resolution: {integrity: sha512-5OVapq0ClabvKvQ58Bws8+wkLCV+Rxg7tUVbo9xu034Nm536QTII4YzhaFriQ7rMrorfnFKUsArD2lqKbFY4vw==} + engines: {node: '>=12'} + cpu: [arm] + os: [android] + requiresBuild: true + dev: true + optional: true + + /@esbuild/android-x64@0.19.11: + resolution: {integrity: sha512-eccxjlfGw43WYoY9QgB82SgGgDbibcqyDTlk3l3C0jOVHKxrjdc9CTwDUQd0vkvYg5um0OH+GpxYvp39r+IPOg==} + engines: {node: '>=12'} + cpu: [x64] + os: [android] + requiresBuild: true + dev: true + optional: true + + /@esbuild/darwin-arm64@0.19.11: + resolution: {integrity: sha512-ETp87DRWuSt9KdDVkqSoKoLFHYTrkyz2+65fj9nfXsaV3bMhTCjtQfw3y+um88vGRKRiF7erPrh/ZuIdLUIVxQ==} + engines: {node: '>=12'} + cpu: [arm64] + os: [darwin] + requiresBuild: true + dev: true + optional: true + + /@esbuild/darwin-x64@0.19.11: + resolution: {integrity: sha512-fkFUiS6IUK9WYUO/+22omwetaSNl5/A8giXvQlcinLIjVkxwTLSktbF5f/kJMftM2MJp9+fXqZ5ezS7+SALp4g==} + engines: {node: '>=12'} + cpu: [x64] + os: [darwin] + requiresBuild: true + dev: true + optional: true + + /@esbuild/freebsd-arm64@0.19.11: + resolution: {integrity: sha512-lhoSp5K6bxKRNdXUtHoNc5HhbXVCS8V0iZmDvyWvYq9S5WSfTIHU2UGjcGt7UeS6iEYp9eeymIl5mJBn0yiuxA==} + engines: {node: '>=12'} + cpu: [arm64] + os: [freebsd] + requiresBuild: true + dev: true + optional: true + + /@esbuild/freebsd-x64@0.19.11: + resolution: {integrity: sha512-JkUqn44AffGXitVI6/AbQdoYAq0TEullFdqcMY/PCUZ36xJ9ZJRtQabzMA+Vi7r78+25ZIBosLTOKnUXBSi1Kw==} + engines: {node: '>=12'} + cpu: [x64] + os: [freebsd] + requiresBuild: true + dev: true + optional: true + + /@esbuild/linux-arm64@0.19.11: + resolution: {integrity: sha512-LneLg3ypEeveBSMuoa0kwMpCGmpu8XQUh+mL8XXwoYZ6Be2qBnVtcDI5azSvh7vioMDhoJFZzp9GWp9IWpYoUg==} + engines: {node: '>=12'} + cpu: [arm64] + os: [linux] + requiresBuild: true + dev: true + optional: true + + /@esbuild/linux-arm@0.19.11: + resolution: {integrity: sha512-3CRkr9+vCV2XJbjwgzjPtO8T0SZUmRZla+UL1jw+XqHZPkPgZiyWvbDvl9rqAN8Zl7qJF0O/9ycMtjU67HN9/Q==} + engines: {node: '>=12'} + cpu: [arm] + os: [linux] + requiresBuild: true + dev: true + optional: true + + /@esbuild/linux-ia32@0.19.11: + resolution: {integrity: sha512-caHy++CsD8Bgq2V5CodbJjFPEiDPq8JJmBdeyZ8GWVQMjRD0sU548nNdwPNvKjVpamYYVL40AORekgfIubwHoA==} + engines: {node: '>=12'} + cpu: [ia32] + os: [linux] + requiresBuild: true + dev: true + optional: true + + /@esbuild/linux-loong64@0.19.11: + resolution: {integrity: sha512-ppZSSLVpPrwHccvC6nQVZaSHlFsvCQyjnvirnVjbKSHuE5N24Yl8F3UwYUUR1UEPaFObGD2tSvVKbvR+uT1Nrg==} + engines: {node: '>=12'} + cpu: [loong64] + os: [linux] + requiresBuild: true + dev: true + optional: true + + /@esbuild/linux-mips64el@0.19.11: + resolution: {integrity: sha512-B5x9j0OgjG+v1dF2DkH34lr+7Gmv0kzX6/V0afF41FkPMMqaQ77pH7CrhWeR22aEeHKaeZVtZ6yFwlxOKPVFyg==} + engines: {node: '>=12'} + cpu: [mips64el] + os: [linux] + requiresBuild: true + dev: true + optional: true + + /@esbuild/linux-ppc64@0.19.11: + resolution: {integrity: sha512-MHrZYLeCG8vXblMetWyttkdVRjQlQUb/oMgBNurVEnhj4YWOr4G5lmBfZjHYQHHN0g6yDmCAQRR8MUHldvvRDA==} + engines: {node: '>=12'} + cpu: [ppc64] + os: [linux] + requiresBuild: true + dev: true + optional: true + + /@esbuild/linux-riscv64@0.19.11: + resolution: {integrity: sha512-f3DY++t94uVg141dozDu4CCUkYW+09rWtaWfnb3bqe4w5NqmZd6nPVBm+qbz7WaHZCoqXqHz5p6CM6qv3qnSSQ==} + engines: {node: '>=12'} + cpu: [riscv64] + os: [linux] + requiresBuild: true + dev: true + optional: true + + /@esbuild/linux-s390x@0.19.11: + resolution: {integrity: sha512-A5xdUoyWJHMMlcSMcPGVLzYzpcY8QP1RtYzX5/bS4dvjBGVxdhuiYyFwp7z74ocV7WDc0n1harxmpq2ePOjI0Q==} + engines: {node: '>=12'} + cpu: [s390x] + os: [linux] + requiresBuild: true + dev: true + optional: true + + /@esbuild/linux-x64@0.19.11: + resolution: {integrity: sha512-grbyMlVCvJSfxFQUndw5mCtWs5LO1gUlwP4CDi4iJBbVpZcqLVT29FxgGuBJGSzyOxotFG4LoO5X+M1350zmPA==} + engines: {node: '>=12'} + cpu: [x64] + os: [linux] + requiresBuild: true + dev: true + optional: true + + /@esbuild/netbsd-x64@0.19.11: + resolution: {integrity: sha512-13jvrQZJc3P230OhU8xgwUnDeuC/9egsjTkXN49b3GcS5BKvJqZn86aGM8W9pd14Kd+u7HuFBMVtrNGhh6fHEQ==} + engines: {node: '>=12'} + cpu: [x64] + os: [netbsd] + requiresBuild: true + dev: true + optional: true + + /@esbuild/openbsd-x64@0.19.11: + resolution: {integrity: sha512-ysyOGZuTp6SNKPE11INDUeFVVQFrhcNDVUgSQVDzqsqX38DjhPEPATpid04LCoUr2WXhQTEZ8ct/EgJCUDpyNw==} + engines: {node: '>=12'} + cpu: [x64] + os: [openbsd] + requiresBuild: true + dev: true + optional: true + + /@esbuild/sunos-x64@0.19.11: + resolution: {integrity: sha512-Hf+Sad9nVwvtxy4DXCZQqLpgmRTQqyFyhT3bZ4F2XlJCjxGmRFF0Shwn9rzhOYRB61w9VMXUkxlBy56dk9JJiQ==} + engines: {node: '>=12'} + cpu: [x64] + os: [sunos] + requiresBuild: true + dev: true + optional: true + + /@esbuild/win32-arm64@0.19.11: + resolution: {integrity: sha512-0P58Sbi0LctOMOQbpEOvOL44Ne0sqbS0XWHMvvrg6NE5jQ1xguCSSw9jQeUk2lfrXYsKDdOe6K+oZiwKPilYPQ==} + engines: {node: '>=12'} + cpu: [arm64] + os: [win32] + requiresBuild: true + dev: true + optional: true + + /@esbuild/win32-ia32@0.19.11: + resolution: {integrity: sha512-6YOrWS+sDJDmshdBIQU+Uoyh7pQKrdykdefC1avn76ss5c+RN6gut3LZA4E2cH5xUEp5/cA0+YxRaVtRAb0xBg==} + engines: {node: '>=12'} + cpu: [ia32] + os: [win32] + requiresBuild: true + dev: true + optional: true + + /@esbuild/win32-x64@0.19.11: + resolution: {integrity: sha512-vfkhltrjCAb603XaFhqhAF4LGDi2M4OrCRrFusyQ+iTLQ/o60QQXxc9cZC/FFpihBI9N1Grn6SMKVJ4KP7Fuiw==} + engines: {node: '>=12'} + cpu: [x64] + os: [win32] + requiresBuild: true + dev: true + optional: true + + /@jridgewell/sourcemap-codec@1.4.15: + resolution: {integrity: sha512-eF2rxCRulEKXHTRiDrDy6erMYWqNw4LPdQ8UQA4huuxaQsVeRPFl2oM8oDGxMFhJUWZf9McpLtJasDDZb/Bpeg==} + dev: true + + /@rollup/rollup-android-arm-eabi@4.9.4: + resolution: {integrity: sha512-ub/SN3yWqIv5CWiAZPHVS1DloyZsJbtXmX4HxUTIpS0BHm9pW5iYBo2mIZi+hE3AeiTzHz33blwSnhdUo+9NpA==} + cpu: [arm] + os: [android] + requiresBuild: true + dev: true + optional: true + + /@rollup/rollup-android-arm64@4.9.4: + resolution: {integrity: sha512-ehcBrOR5XTl0W0t2WxfTyHCR/3Cq2jfb+I4W+Ch8Y9b5G+vbAecVv0Fx/J1QKktOrgUYsIKxWAKgIpvw56IFNA==} + cpu: [arm64] + os: [android] + requiresBuild: true + dev: true + optional: true + + /@rollup/rollup-darwin-arm64@4.9.4: + resolution: {integrity: sha512-1fzh1lWExwSTWy8vJPnNbNM02WZDS8AW3McEOb7wW+nPChLKf3WG2aG7fhaUmfX5FKw9zhsF5+MBwArGyNM7NA==} + cpu: [arm64] + os: [darwin] + requiresBuild: true + dev: true + optional: true + + /@rollup/rollup-darwin-x64@4.9.4: + resolution: {integrity: sha512-Gc6cukkF38RcYQ6uPdiXi70JB0f29CwcQ7+r4QpfNpQFVHXRd0DfWFidoGxjSx1DwOETM97JPz1RXL5ISSB0pA==} + cpu: [x64] + os: [darwin] + requiresBuild: true + dev: true + optional: true + + /@rollup/rollup-linux-arm-gnueabihf@4.9.4: + resolution: {integrity: sha512-g21RTeFzoTl8GxosHbnQZ0/JkuFIB13C3T7Y0HtKzOXmoHhewLbVTFBQZu+z5m9STH6FZ7L/oPgU4Nm5ErN2fw==} + cpu: [arm] + os: [linux] + requiresBuild: true + dev: true + optional: true + + /@rollup/rollup-linux-arm64-gnu@4.9.4: + resolution: {integrity: sha512-TVYVWD/SYwWzGGnbfTkrNpdE4HON46orgMNHCivlXmlsSGQOx/OHHYiQcMIOx38/GWgwr/po2LBn7wypkWw/Mg==} + cpu: [arm64] + os: [linux] + libc: [glibc] + requiresBuild: true + dev: true + optional: true + + /@rollup/rollup-linux-arm64-musl@4.9.4: + resolution: {integrity: sha512-XcKvuendwizYYhFxpvQ3xVpzje2HHImzg33wL9zvxtj77HvPStbSGI9czrdbfrf8DGMcNNReH9pVZv8qejAQ5A==} + cpu: [arm64] + os: [linux] + libc: [musl] + requiresBuild: true + dev: true + optional: true + + /@rollup/rollup-linux-riscv64-gnu@4.9.4: + resolution: {integrity: sha512-LFHS/8Q+I9YA0yVETyjonMJ3UA+DczeBd/MqNEzsGSTdNvSJa1OJZcSH8GiXLvcizgp9AlHs2walqRcqzjOi3A==} + cpu: [riscv64] + os: [linux] + libc: [glibc] + requiresBuild: true + dev: true + optional: true + + /@rollup/rollup-linux-x64-gnu@4.9.4: + resolution: {integrity: sha512-dIYgo+j1+yfy81i0YVU5KnQrIJZE8ERomx17ReU4GREjGtDW4X+nvkBak2xAUpyqLs4eleDSj3RrV72fQos7zw==} + cpu: [x64] + os: [linux] + libc: [glibc] + requiresBuild: true + dev: true + optional: true + + /@rollup/rollup-linux-x64-musl@4.9.4: + resolution: {integrity: sha512-RoaYxjdHQ5TPjaPrLsfKqR3pakMr3JGqZ+jZM0zP2IkDtsGa4CqYaWSfQmZVgFUCgLrTnzX+cnHS3nfl+kB6ZQ==} + cpu: [x64] + os: [linux] + libc: [musl] + requiresBuild: true + dev: true + optional: true + + /@rollup/rollup-win32-arm64-msvc@4.9.4: + resolution: {integrity: sha512-T8Q3XHV+Jjf5e49B4EAaLKV74BbX7/qYBRQ8Wop/+TyyU0k+vSjiLVSHNWdVd1goMjZcbhDmYZUYW5RFqkBNHQ==} + cpu: [arm64] + os: [win32] + requiresBuild: true + dev: true + optional: true + + /@rollup/rollup-win32-ia32-msvc@4.9.4: + resolution: {integrity: sha512-z+JQ7JirDUHAsMecVydnBPWLwJjbppU+7LZjffGf+Jvrxq+dVjIE7By163Sc9DKc3ADSU50qPVw0KonBS+a+HQ==} + cpu: [ia32] + os: [win32] + requiresBuild: true + dev: true + optional: true + + /@rollup/rollup-win32-x64-msvc@4.9.4: + resolution: {integrity: sha512-LfdGXCV9rdEify1oxlN9eamvDSjv9md9ZVMAbNHA87xqIfFCxImxan9qZ8+Un54iK2nnqPlbnSi4R54ONtbWBw==} + cpu: [x64] + os: [win32] + requiresBuild: true + dev: true + optional: true + + /@types/estree@1.0.5: + resolution: {integrity: sha512-/kYRxGDLWzHOB7q+wtSUQlFrtcdUccpfy+X+9iMBpHK8QLLhx2wIPYuS5DYtR9Wa/YlZAbIovy7qVdB1Aq6Lyw==} + dev: true + + /@types/linkify-it@3.0.5: + resolution: {integrity: sha512-yg6E+u0/+Zjva+buc3EIb+29XEg4wltq7cSmd4Uc2EE/1nUVmxyzpX6gUXD0V8jIrG0r7YeOGVIbYRkxeooCtw==} + dev: true + + /@types/markdown-it@13.0.7: + resolution: {integrity: sha512-U/CBi2YUUcTHBt5tjO2r5QV/x0Po6nsYwQU4Y04fBS6vfoImaiZ6f8bi3CjTCxBPQSO1LMyUqkByzi8AidyxfA==} + dependencies: + '@types/linkify-it': 3.0.5 + '@types/mdurl': 1.0.5 + dev: true + + /@types/mdurl@1.0.5: + resolution: {integrity: sha512-6L6VymKTzYSrEf4Nev4Xa1LCHKrlTlYCBMTlQKFuddo1CvQcE52I0mwfOJayueUC7MJuXOeHTcIU683lzd0cUA==} + dev: true + + /@types/node@20.11.20: + resolution: {integrity: sha512-7/rR21OS+fq8IyHTgtLkDK949uzsa6n8BkziAKtPVpugIkO6D+/ooXMvzXxDnZrmtXVfjb1bKQafYpb8s89LOg==} + dependencies: + undici-types: 5.26.5 + dev: true + + /@types/web-bluetooth@0.0.20: + resolution: {integrity: sha512-g9gZnnXVq7gM7v3tJCWV/qw7w+KeOlSHAhgF9RytFyifW6AF61hdT2ucrYhPq9hLs5JIryeupHV3qGk95dH9ow==} + dev: true + + /@vitejs/plugin-vue@5.0.3(vite@5.0.12)(vue@3.4.10): + resolution: {integrity: sha512-b8S5dVS40rgHdDrw+DQi/xOM9ed+kSRZzfm1T74bMmBDCd8XO87NKlFYInzCtwvtWwXZvo1QxE2OSspTATWrbA==} + engines: {node: ^18.0.0 || >=20.0.0} + peerDependencies: + vite: ^5.0.0 + vue: ^3.2.25 + dependencies: + vite: 5.0.12(@types/node@20.11.20) + vue: 3.4.10 + dev: true + + /@vue/compiler-core@3.4.10: + resolution: {integrity: sha512-53vxh7K9qbx+JILnGEhrFRyr7H7e4NdT8RuTNU3m6HhJKFvcAqFTNXpYMHnyuAzzRGdsbsYHBgQC3H6xEXTG6w==} + dependencies: + '@babel/parser': 7.23.6 + '@vue/shared': 3.4.10 + entities: 4.5.0 + estree-walker: 2.0.2 + source-map-js: 1.0.2 + dev: true + + /@vue/compiler-dom@3.4.10: + resolution: {integrity: sha512-QAALBJksIFpXGYuo74rtMgnwpVZDvd3kYbUa4gYX9s/5QiqEvZSgbKtOdUGydXcxKPt3ifC+0/bhPVHXN2694A==} + dependencies: + '@vue/compiler-core': 3.4.10 + '@vue/shared': 3.4.10 + dev: true + + /@vue/compiler-sfc@3.4.10: + resolution: {integrity: sha512-sTOssaQySgrMjrhZxmAqdp6n+E51VteIVIDaOR537H2P63DyzMmig21U0XXFxiXmMIfrK91lAInnc+bIAYemGw==} + dependencies: + '@babel/parser': 7.23.6 + '@vue/compiler-core': 3.4.10 + '@vue/compiler-dom': 3.4.10 + '@vue/compiler-ssr': 3.4.10 + '@vue/shared': 3.4.10 + estree-walker: 2.0.2 + magic-string: 0.30.5 + postcss: 8.4.33 + source-map-js: 1.0.2 + dev: true + + /@vue/compiler-ssr@3.4.10: + resolution: {integrity: sha512-Y90TL1abretWbUiK5rv+9smS1thCHE5sSuhZgiLh6cxgZ2Pcy3BEvDd3reID0iwNcTdMbTeE6NI3Aq4Mux6hqQ==} + dependencies: + '@vue/compiler-dom': 3.4.10 + '@vue/shared': 3.4.10 + dev: true + + /@vue/devtools-api@6.5.1: + resolution: {integrity: sha512-+KpckaAQyfbvshdDW5xQylLni1asvNSGme1JFs8I1+/H5pHEhqUKMEQD/qn3Nx5+/nycBq11qAEi8lk+LXI2dA==} + dev: true + + /@vue/reactivity@3.4.10: + resolution: {integrity: sha512-SmGGpo37LzPcAFTopHNIJRNVOQfma9YgyPkAzx9/TJ01lbCCYigS28hEcY1hjiJ1PRK8iVX62Ov5yzmUgYH/pQ==} + dependencies: + '@vue/shared': 3.4.10 + dev: true + + /@vue/runtime-core@3.4.10: + resolution: {integrity: sha512-Ri2Cz9sFr66AEUewGUK8IXhIUAhshTHVUGuJR8pqMbtjIds+zPa8QPO5UZImGMQ8HTY7eEpKwztCct9V3+Iqug==} + dependencies: + '@vue/reactivity': 3.4.10 + '@vue/shared': 3.4.10 + dev: true + + /@vue/runtime-dom@3.4.10: + resolution: {integrity: sha512-ROsdi5M2niRDmjXJNZ8KKiGwXyG1FO8l9n6sCN0kaJEHbjWkuigu96YAI3fK/AWUZPSXXEcMEBVPC6rL3mmUuA==} + dependencies: + '@vue/runtime-core': 3.4.10 + '@vue/shared': 3.4.10 + csstype: 3.1.3 + dev: true + + /@vue/server-renderer@3.4.10(vue@3.4.10): + resolution: {integrity: sha512-WpCBAhesLq44JKWfdFqb+Bi4ACUW0d8x1z90GnE0spccsAlEDMXV5nm+pwXLyW0OdP2iPrO/n/QMJh4B1v9Ciw==} + peerDependencies: + vue: 3.4.10 + dependencies: + '@vue/compiler-ssr': 3.4.10 + '@vue/shared': 3.4.10 + vue: 3.4.10 + dev: true + + /@vue/shared@3.4.10: + resolution: {integrity: sha512-C0mIVhwW1xQLMFyqMJxnhq6fWyE02lCgcE+TDdtGpg6B3H6kh/0YcqS54qYc76UJNlWegf3VgsLqgk6D9hBmzQ==} + dev: true + + /@vueuse/core@10.7.1(vue@3.4.10): + resolution: {integrity: sha512-74mWHlaesJSWGp1ihg76vAnfVq9NTv1YT0SYhAQ6zwFNdBkkP+CKKJmVOEHcdSnLXCXYiL5e7MaewblfiYLP7g==} + dependencies: + '@types/web-bluetooth': 0.0.20 + '@vueuse/metadata': 10.7.1 + '@vueuse/shared': 10.7.1(vue@3.4.10) + vue-demi: 0.14.6(vue@3.4.10) + transitivePeerDependencies: + - '@vue/composition-api' + - vue + dev: true + + /@vueuse/integrations@10.7.1(focus-trap@7.5.4)(vue@3.4.10): + resolution: {integrity: sha512-cKo5LEeKVHdBRBtMTOrDPdR0YNtrmN9IBfdcnY2P3m5LHVrsD0xiHUtAH1WKjHQRIErZG6rJUa6GA4tWZt89Og==} + peerDependencies: + async-validator: '*' + axios: '*' + change-case: '*' + drauu: '*' + focus-trap: '*' + fuse.js: '*' + idb-keyval: '*' + jwt-decode: '*' + nprogress: '*' + qrcode: '*' + sortablejs: '*' + universal-cookie: '*' + peerDependenciesMeta: + async-validator: + optional: true + axios: + optional: true + change-case: + optional: true + drauu: + optional: true + focus-trap: + optional: true + fuse.js: + optional: true + idb-keyval: + optional: true + jwt-decode: + optional: true + nprogress: + optional: true + qrcode: + optional: true + sortablejs: + optional: true + universal-cookie: + optional: true + dependencies: + '@vueuse/core': 10.7.1(vue@3.4.10) + '@vueuse/shared': 10.7.1(vue@3.4.10) + focus-trap: 7.5.4 + vue-demi: 0.14.6(vue@3.4.10) + transitivePeerDependencies: + - '@vue/composition-api' + - vue + dev: true + + /@vueuse/metadata@10.7.1: + resolution: {integrity: sha512-jX8MbX5UX067DYVsbtrmKn6eG6KMcXxLRLlurGkZku5ZYT3vxgBjui2zajvUZ18QLIjrgBkFRsu7CqTAg18QFw==} + dev: true + + /@vueuse/shared@10.7.1(vue@3.4.10): + resolution: {integrity: sha512-v0jbRR31LSgRY/C5i5X279A/WQjD6/JsMzGa+eqt658oJ75IvQXAeONmwvEMrvJQKnRElq/frzBR7fhmWY5uLw==} + dependencies: + vue-demi: 0.14.6(vue@3.4.10) + transitivePeerDependencies: + - '@vue/composition-api' + - vue + dev: true + + /algoliasearch@4.22.1: + resolution: {integrity: sha512-jwydKFQJKIx9kIZ8Jm44SdpigFwRGPESaxZBaHSV0XWN2yBJAOT4mT7ppvlrpA4UGzz92pqFnVKr/kaZXrcreg==} + dependencies: + '@algolia/cache-browser-local-storage': 4.22.1 + '@algolia/cache-common': 4.22.1 + '@algolia/cache-in-memory': 4.22.1 + '@algolia/client-account': 4.22.1 + '@algolia/client-analytics': 4.22.1 + '@algolia/client-common': 4.22.1 + '@algolia/client-personalization': 4.22.1 + '@algolia/client-search': 4.22.1 + '@algolia/logger-common': 4.22.1 + '@algolia/logger-console': 4.22.1 + '@algolia/requester-browser-xhr': 4.22.1 + '@algolia/requester-common': 4.22.1 + '@algolia/requester-node-http': 4.22.1 + '@algolia/transporter': 4.22.1 + dev: true + + /csstype@3.1.3: + resolution: {integrity: sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw==} + dev: true + + /entities@4.5.0: + resolution: {integrity: sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==} + engines: {node: '>=0.12'} + dev: true + + /esbuild@0.19.11: + resolution: {integrity: sha512-HJ96Hev2hX/6i5cDVwcqiJBBtuo9+FeIJOtZ9W1kA5M6AMJRHUZlpYZ1/SbEwtO0ioNAW8rUooVpC/WehY2SfA==} + engines: {node: '>=12'} + hasBin: true + requiresBuild: true + optionalDependencies: + '@esbuild/aix-ppc64': 0.19.11 + '@esbuild/android-arm': 0.19.11 + '@esbuild/android-arm64': 0.19.11 + '@esbuild/android-x64': 0.19.11 + '@esbuild/darwin-arm64': 0.19.11 + '@esbuild/darwin-x64': 0.19.11 + '@esbuild/freebsd-arm64': 0.19.11 + '@esbuild/freebsd-x64': 0.19.11 + '@esbuild/linux-arm': 0.19.11 + '@esbuild/linux-arm64': 0.19.11 + '@esbuild/linux-ia32': 0.19.11 + '@esbuild/linux-loong64': 0.19.11 + '@esbuild/linux-mips64el': 0.19.11 + '@esbuild/linux-ppc64': 0.19.11 + '@esbuild/linux-riscv64': 0.19.11 + '@esbuild/linux-s390x': 0.19.11 + '@esbuild/linux-x64': 0.19.11 + '@esbuild/netbsd-x64': 0.19.11 + '@esbuild/openbsd-x64': 0.19.11 + '@esbuild/sunos-x64': 0.19.11 + '@esbuild/win32-arm64': 0.19.11 + '@esbuild/win32-ia32': 0.19.11 + '@esbuild/win32-x64': 0.19.11 + dev: true + + /estree-walker@2.0.2: + resolution: {integrity: sha512-Rfkk/Mp/DL7JVje3u18FxFujQlTNR2q6QfMSMB7AvCBx91NGj/ba3kCfza0f6dVDbw7YlRf/nDrn7pQrCCyQ/w==} + dev: true + + /focus-trap@7.5.4: + resolution: {integrity: sha512-N7kHdlgsO/v+iD/dMoJKtsSqs5Dz/dXZVebRgJw23LDk+jMi/974zyiOYDziY2JPp8xivq9BmUGwIJMiuSBi7w==} + dependencies: + tabbable: 6.2.0 + dev: true + + /fsevents@2.3.3: + resolution: {integrity: sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==} + engines: {node: ^8.16.0 || ^10.6.0 || >=11.0.0} + os: [darwin] + requiresBuild: true + dev: true + optional: true + + /magic-string@0.30.5: + resolution: {integrity: sha512-7xlpfBaQaP/T6Vh8MO/EqXSW5En6INHEvEXQiuff7Gku0PWjU3uf6w/j9o7O+SpB5fOAkrI5HeoNgwjEO0pFsA==} + engines: {node: '>=12'} + dependencies: + '@jridgewell/sourcemap-codec': 1.4.15 + dev: true + + /mark.js@8.11.1: + resolution: {integrity: sha512-1I+1qpDt4idfgLQG+BNWmrqku+7/2bi5nLf4YwF8y8zXvmfiTBY3PV3ZibfrjBueCByROpuBjLLFCajqkgYoLQ==} + dev: true + + /minisearch@6.3.0: + resolution: {integrity: sha512-ihFnidEeU8iXzcVHy74dhkxh/dn8Dc08ERl0xwoMMGqp4+LvRSCgicb+zGqWthVokQKvCSxITlh3P08OzdTYCQ==} + dev: true + + /nanoid@3.3.7: + resolution: {integrity: sha512-eSRppjcPIatRIMC1U6UngP8XFcz8MQWGQdt1MTBQ7NaAmvXDfvNxbvWV3x2y6CdEUciCSsDHDQZbhYaB8QEo2g==} + engines: {node: ^10 || ^12 || ^13.7 || ^14 || >=15.0.1} + hasBin: true + dev: true + + /picocolors@1.0.0: + resolution: {integrity: sha512-1fygroTLlHu66zi26VoTDv8yRgm0Fccecssto+MhsZ0D/DGW2sm8E8AjW7NU5VVTRt5GxbeZ5qBuJr+HyLYkjQ==} + dev: true + + /postcss@8.4.33: + resolution: {integrity: sha512-Kkpbhhdjw2qQs2O2DGX+8m5OVqEcbB9HRBvuYM9pgrjEFUg30A9LmXNlTAUj4S9kgtGyrMbTzVjH7E+s5Re2yg==} + engines: {node: ^10 || ^12 || >=14} + dependencies: + nanoid: 3.3.7 + picocolors: 1.0.0 + source-map-js: 1.0.2 + dev: true + + /preact@10.19.3: + resolution: {integrity: sha512-nHHTeFVBTHRGxJXKkKu5hT8C/YWBkPso4/Gad6xuj5dbptt9iF9NZr9pHbPhBrnT2klheu7mHTxTZ/LjwJiEiQ==} + dev: true + + /rollup@4.9.4: + resolution: {integrity: sha512-2ztU7pY/lrQyXSCnnoU4ICjT/tCG9cdH3/G25ERqE3Lst6vl2BCM5hL2Nw+sslAvAf+ccKsAq1SkKQALyqhR7g==} + engines: {node: '>=18.0.0', npm: '>=8.0.0'} + hasBin: true + dependencies: + '@types/estree': 1.0.5 + optionalDependencies: + '@rollup/rollup-android-arm-eabi': 4.9.4 + '@rollup/rollup-android-arm64': 4.9.4 + '@rollup/rollup-darwin-arm64': 4.9.4 + '@rollup/rollup-darwin-x64': 4.9.4 + '@rollup/rollup-linux-arm-gnueabihf': 4.9.4 + '@rollup/rollup-linux-arm64-gnu': 4.9.4 + '@rollup/rollup-linux-arm64-musl': 4.9.4 + '@rollup/rollup-linux-riscv64-gnu': 4.9.4 + '@rollup/rollup-linux-x64-gnu': 4.9.4 + '@rollup/rollup-linux-x64-musl': 4.9.4 + '@rollup/rollup-win32-arm64-msvc': 4.9.4 + '@rollup/rollup-win32-ia32-msvc': 4.9.4 + '@rollup/rollup-win32-x64-msvc': 4.9.4 + fsevents: 2.3.3 + dev: true + + /search-insights@2.13.0: + resolution: {integrity: sha512-Orrsjf9trHHxFRuo9/rzm0KIWmgzE8RMlZMzuhZOJ01Rnz3D0YBAe+V6473t6/H6c7irs6Lt48brULAiRWb3Vw==} + dev: true + + /shikiji-core@0.9.18: + resolution: {integrity: sha512-PKTXptbrp/WEDjNHV8OFG9KkfhmR0pSd161kzlDDlgQ0HXAnqJYNDSjqsy1CYZMx5bSvLMy42yJj9oFTqmkNTQ==} + dev: true + + /shikiji-transformers@0.9.18: + resolution: {integrity: sha512-lvKVfgx1ETDqUNxqiUn+whlnjQiunsAg76DOpzjjxkHE/bLcwa+jrghcMxQhui86SLR1tzCdM4Imh+RxW0LI2Q==} + dependencies: + shikiji: 0.9.18 + dev: true + + /shikiji@0.9.18: + resolution: {integrity: sha512-/tFMIdV7UQklzN13VjF0/XFzmii6C606Jc878hNezvB8ZR8FG8FW9j0I4J9EJre0owlnPntgLVPpHqy27Gs+DQ==} + dependencies: + shikiji-core: 0.9.18 + dev: true + + /source-map-js@1.0.2: + resolution: {integrity: sha512-R0XvVJ9WusLiqTCEiGCmICCMplcCkIwwR11mOSD9CR5u+IXYdiseeEuXCVAjS54zqwkLcPNnmU4OeJ6tUrWhDw==} + engines: {node: '>=0.10.0'} + dev: true + + /tabbable@6.2.0: + resolution: {integrity: sha512-Cat63mxsVJlzYvN51JmVXIgNoUokrIaT2zLclCXjRd8boZ0004U4KCs/sToJ75C6sdlByWxpYnb5Boif1VSFew==} + dev: true + + /to-fast-properties@2.0.0: + resolution: {integrity: sha512-/OaKK0xYrs3DmxRYqL/yDc+FxFUVYhDlXMhRmv3z915w2HF1tnN1omB354j8VUGO/hbRzyD6Y3sA7v7GS/ceog==} + engines: {node: '>=4'} + dev: true + + /undici-types@5.26.5: + resolution: {integrity: sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==} + dev: true + + /vite@5.0.12(@types/node@20.11.20): + resolution: {integrity: sha512-4hsnEkG3q0N4Tzf1+t6NdN9dg/L3BM+q8SWgbSPnJvrgH2kgdyzfVJwbR1ic69/4uMJJ/3dqDZZE5/WwqW8U1w==} + engines: {node: ^18.0.0 || >=20.0.0} + hasBin: true + peerDependencies: + '@types/node': ^18.0.0 || >=20.0.0 + less: '*' + lightningcss: ^1.21.0 + sass: '*' + stylus: '*' + sugarss: '*' + terser: ^5.4.0 + peerDependenciesMeta: + '@types/node': + optional: true + less: + optional: true + lightningcss: + optional: true + sass: + optional: true + stylus: + optional: true + sugarss: + optional: true + terser: + optional: true + dependencies: + '@types/node': 20.11.20 + esbuild: 0.19.11 + postcss: 8.4.33 + rollup: 4.9.4 + optionalDependencies: + fsevents: 2.3.3 + dev: true + + /vitepress@1.0.0-rc.36(@algolia/client-search@4.22.1)(@types/node@20.11.20)(search-insights@2.13.0): + resolution: {integrity: sha512-2z4dpM9PplN/yvTifhavOIAazlCR6OJ5PvLoRbc+7LdcFeIlCsuDGENLX4HjMW18jQZF5/j7++PNqdBfeazxUA==} + hasBin: true + peerDependencies: + markdown-it-mathjax3: ^4.3.2 + postcss: ^8.4.33 + peerDependenciesMeta: + markdown-it-mathjax3: + optional: true + postcss: + optional: true + dependencies: + '@docsearch/css': 3.5.2 + '@docsearch/js': 3.5.2(@algolia/client-search@4.22.1)(search-insights@2.13.0) + '@types/markdown-it': 13.0.7 + '@vitejs/plugin-vue': 5.0.3(vite@5.0.12)(vue@3.4.10) + '@vue/devtools-api': 6.5.1 + '@vueuse/core': 10.7.1(vue@3.4.10) + '@vueuse/integrations': 10.7.1(focus-trap@7.5.4)(vue@3.4.10) + focus-trap: 7.5.4 + mark.js: 8.11.1 + minisearch: 6.3.0 + shikiji: 0.9.18 + shikiji-core: 0.9.18 + shikiji-transformers: 0.9.18 + vite: 5.0.12(@types/node@20.11.20) + vue: 3.4.10 + transitivePeerDependencies: + - '@algolia/client-search' + - '@types/node' + - '@types/react' + - '@vue/composition-api' + - async-validator + - axios + - change-case + - drauu + - fuse.js + - idb-keyval + - jwt-decode + - less + - lightningcss + - nprogress + - qrcode + - react + - react-dom + - sass + - search-insights + - sortablejs + - stylus + - sugarss + - terser + - typescript + - universal-cookie + dev: true + + /vue-demi@0.14.6(vue@3.4.10): + resolution: {integrity: sha512-8QA7wrYSHKaYgUxDA5ZC24w+eHm3sYCbp0EzcDwKqN3p6HqtTCGR/GVsPyZW92unff4UlcSh++lmqDWN3ZIq4w==} + engines: {node: '>=12'} + hasBin: true + requiresBuild: true + peerDependencies: + '@vue/composition-api': ^1.0.0-rc.1 + vue: ^3.0.0-0 || ^2.6.0 + peerDependenciesMeta: + '@vue/composition-api': + optional: true + dependencies: + vue: 3.4.10 + dev: true + + /vue@3.4.10: + resolution: {integrity: sha512-c+O8qGqdWPF9joTCzMGeDDedViooh6c8RY3+eW5+6GCAIY8YjChmU06LsUu0PnMZbIk1oKUoJTqKzmghYtFypw==} + peerDependencies: + typescript: '*' + peerDependenciesMeta: + typescript: + optional: true + dependencies: + '@vue/compiler-dom': 3.4.10 + '@vue/compiler-sfc': 3.4.10 + '@vue/runtime-dom': 3.4.10 + '@vue/server-renderer': 3.4.10(vue@3.4.10) + '@vue/shared': 3.4.10 + dev: true diff --git a/preface.md b/preface.md new file mode 100644 index 0000000..62dc77a --- /dev/null +++ b/preface.md @@ -0,0 +1,37 @@ +--- +outline: deep +--- + +# 卷首语 + +今年的卷首语,我花了很大的“毅力”,才抑制了自己用 ChatGPT 帮助撰写的冲动,打算还是完全靠自己手写。这其实也是整个 2023 年的趋势之一:越来越多的工作,人们都开始尝试寻求 AI 的帮助。 + +### AI & AIGC + +除了已经不再开源的 OpenAI,在 2023 年诞生了无数热门的、开源的大模型;无数热门的、开源的基于 GPT 的应用;还有 AutoGPT、LangChain、CoT、RAG 这样的新名词和新项目层出不穷。在图像生成、语音生成、代码生成等一系列 AIGC 的领域,都给人一种一日千里,恍如隔世的感觉。 + +从 2020 年开始的每一年的开源年报,我们都会谈到“剧变”,如果 2020~2022 年的变化,是以世界风云变化为主的话,到了 2023 年,就真的已经在 IT 技术领域,在开源生态领域,掀起滔天巨浪了。 + +### 关于全域数据 + +今年的中国开源年度报告,本身也有一件大事发生。就是我们首次能够将 GitHub 的数据与 Gitee 的数据放在一起,进行通盘的比较与洞察,其中的一些发现可能会打破很多人对于中国开源活跃度与贡献度的“偏见”。而这样的数据扩展,我们今后还将不断延伸,使得我们的数据源,真正不负“全域”之名。 + +### 开源社区如何应对那些最棘手的挑战? + +2023 年 7 月,瑞士日内瓦成为 Linux 基金会举办的 Open Source Congress 的会议地点。这次聚会讨论了开源社区面临的紧迫问题,如网络安全、技术民族主义(techno-nationalism)的崛起、人工智能的复杂性以及日益增加的监管审查的挑战。本次会议邀请了 73 家开源组织参加,其中开源社与开放原子开源基金会,也受邀派代表,参加了日内瓦的会议。 + +作为一次野心勃勃的尝试,第一届开源“议会”还只是一个开始,未来还需要全世界范围内的开源人,更好的携手应对挑战。 + +### 中国这一年的开源发展究竟如何? + +除开疫情结束之后的报复性反弹,也不用过多引述接下来的报告中会出现的各种统计数据,只谈个人的直观感受,那就是:外热内温。一方面外部的各种国家政策、地方政策、技术会议、社区交流,的确都非常热闹;而另一方面,在开源社区内部,尤其是开发活跃度方面,其实已经“由热转温”,增长放缓了。而且,这还不仅仅是中国的开源发展,而是全球开源发展的大趋势。 + +除了直观感受,还应该有客观评价。我们既不必洋洋得意,也不必闷闷不乐,更不该妄自菲薄。对于中国开源,“捧杀”与“棒杀”,都是过犹不及的做法。 + +回首 2023,展望 2024,我们应该期待一些什么?又应该投身于哪些方向?在一个变化太快,以至于人人都感到焦虑的时代,哪些才是我们能够把握的呢?让我们一起来探索吧! + +
+庄表伟 开源社理事暨执行长 + +2024 年 1 月 14 日 +
diff --git a/public/.nojekyll b/public/.nojekyll new file mode 100644 index 0000000..e69de29 diff --git a/public/image/China-Open-Source-Report.ico b/public/image/China-Open-Source-Report.ico new file mode 100644 index 0000000..3de38c0 Binary files /dev/null and b/public/image/China-Open-Source-Report.ico differ diff --git a/public/image/China-Open-Source-Report.png b/public/image/China-Open-Source-Report.png new file mode 100644 index 0000000..1cdbddf Binary files /dev/null and b/public/image/China-Open-Source-Report.png differ diff --git a/public/image/commercialization/chapter_2/2-1.png b/public/image/commercialization/chapter_2/2-1.png new file mode 100644 index 0000000..57a081f Binary files /dev/null and b/public/image/commercialization/chapter_2/2-1.png differ diff --git a/public/image/commercialization/chapter_2/2-10.png b/public/image/commercialization/chapter_2/2-10.png new file mode 100644 index 0000000..f171052 Binary files /dev/null and b/public/image/commercialization/chapter_2/2-10.png differ diff --git a/public/image/commercialization/chapter_2/2-11.png b/public/image/commercialization/chapter_2/2-11.png new file mode 100644 index 0000000..83592a8 Binary files /dev/null and b/public/image/commercialization/chapter_2/2-11.png differ diff --git a/public/image/commercialization/chapter_2/2-12.png b/public/image/commercialization/chapter_2/2-12.png new file mode 100644 index 0000000..fe9126f Binary files /dev/null and b/public/image/commercialization/chapter_2/2-12.png differ diff --git a/public/image/commercialization/chapter_2/2-13.png b/public/image/commercialization/chapter_2/2-13.png new file mode 100644 index 0000000..764244c Binary files /dev/null and b/public/image/commercialization/chapter_2/2-13.png differ diff --git a/public/image/commercialization/chapter_2/2-14.png b/public/image/commercialization/chapter_2/2-14.png new file mode 100644 index 0000000..aa3fcc5 Binary files /dev/null and b/public/image/commercialization/chapter_2/2-14.png differ diff --git a/public/image/commercialization/chapter_2/2-15.png b/public/image/commercialization/chapter_2/2-15.png new file mode 100644 index 0000000..56cbc27 Binary files /dev/null and b/public/image/commercialization/chapter_2/2-15.png differ diff --git a/public/image/commercialization/chapter_2/2-16.png b/public/image/commercialization/chapter_2/2-16.png new file mode 100644 index 0000000..4cf5dd1 Binary files /dev/null and b/public/image/commercialization/chapter_2/2-16.png differ diff --git a/public/image/commercialization/chapter_2/2-17.png b/public/image/commercialization/chapter_2/2-17.png new file mode 100644 index 0000000..f07d180 Binary files /dev/null and b/public/image/commercialization/chapter_2/2-17.png differ diff --git a/public/image/commercialization/chapter_2/2-18.png b/public/image/commercialization/chapter_2/2-18.png new file mode 100644 index 0000000..ef00ba4 Binary files /dev/null and b/public/image/commercialization/chapter_2/2-18.png differ diff --git a/public/image/commercialization/chapter_2/2-19.png b/public/image/commercialization/chapter_2/2-19.png new file mode 100644 index 0000000..a9b6132 Binary files /dev/null and b/public/image/commercialization/chapter_2/2-19.png differ diff --git a/public/image/commercialization/chapter_2/2-2.png b/public/image/commercialization/chapter_2/2-2.png new file mode 100644 index 0000000..d2e1809 Binary files /dev/null and b/public/image/commercialization/chapter_2/2-2.png differ diff --git a/public/image/commercialization/chapter_2/2-20.png b/public/image/commercialization/chapter_2/2-20.png new file mode 100644 index 0000000..a6fec06 Binary files /dev/null and b/public/image/commercialization/chapter_2/2-20.png differ diff --git a/public/image/commercialization/chapter_2/2-21.png b/public/image/commercialization/chapter_2/2-21.png new file mode 100644 index 0000000..5a00637 Binary files /dev/null and b/public/image/commercialization/chapter_2/2-21.png differ diff --git a/public/image/commercialization/chapter_2/2-22.png b/public/image/commercialization/chapter_2/2-22.png new file mode 100644 index 0000000..c040897 Binary files /dev/null and b/public/image/commercialization/chapter_2/2-22.png differ diff --git a/public/image/commercialization/chapter_2/2-23.png b/public/image/commercialization/chapter_2/2-23.png new file mode 100644 index 0000000..b5f0b16 Binary files /dev/null and b/public/image/commercialization/chapter_2/2-23.png differ diff --git a/public/image/commercialization/chapter_2/2-24.png b/public/image/commercialization/chapter_2/2-24.png new file mode 100644 index 0000000..0746de8 Binary files /dev/null and b/public/image/commercialization/chapter_2/2-24.png differ diff --git a/public/image/commercialization/chapter_2/2-25.png b/public/image/commercialization/chapter_2/2-25.png new file mode 100644 index 0000000..cf95372 Binary files /dev/null and b/public/image/commercialization/chapter_2/2-25.png differ diff --git a/public/image/commercialization/chapter_2/2-26.png b/public/image/commercialization/chapter_2/2-26.png new file mode 100644 index 0000000..009363d Binary files /dev/null and b/public/image/commercialization/chapter_2/2-26.png differ diff --git a/public/image/commercialization/chapter_2/2-27.png b/public/image/commercialization/chapter_2/2-27.png new file mode 100644 index 0000000..bd3f386 Binary files /dev/null and b/public/image/commercialization/chapter_2/2-27.png differ diff --git a/public/image/commercialization/chapter_2/2-28.png b/public/image/commercialization/chapter_2/2-28.png new file mode 100644 index 0000000..236119f Binary files /dev/null and b/public/image/commercialization/chapter_2/2-28.png differ diff --git a/public/image/commercialization/chapter_2/2-3.png b/public/image/commercialization/chapter_2/2-3.png new file mode 100644 index 0000000..c4bb26e Binary files /dev/null and b/public/image/commercialization/chapter_2/2-3.png differ diff --git a/public/image/commercialization/chapter_2/2-4.png b/public/image/commercialization/chapter_2/2-4.png new file mode 100644 index 0000000..82ee650 Binary files /dev/null and b/public/image/commercialization/chapter_2/2-4.png differ diff --git a/public/image/commercialization/chapter_2/2-5.png b/public/image/commercialization/chapter_2/2-5.png new file mode 100644 index 0000000..93ac132 Binary files /dev/null and b/public/image/commercialization/chapter_2/2-5.png differ diff --git a/public/image/commercialization/chapter_2/2-6.png b/public/image/commercialization/chapter_2/2-6.png new file mode 100644 index 0000000..aa37c34 Binary files /dev/null and b/public/image/commercialization/chapter_2/2-6.png differ diff --git a/public/image/commercialization/chapter_2/2-7.png b/public/image/commercialization/chapter_2/2-7.png new file mode 100644 index 0000000..843fbda Binary files /dev/null and b/public/image/commercialization/chapter_2/2-7.png differ diff --git a/public/image/commercialization/chapter_2/2-8.png b/public/image/commercialization/chapter_2/2-8.png new file mode 100644 index 0000000..7b8ca83 Binary files /dev/null and b/public/image/commercialization/chapter_2/2-8.png differ diff --git a/public/image/commercialization/chapter_2/2-9.png b/public/image/commercialization/chapter_2/2-9.png new file mode 100644 index 0000000..2e576d4 Binary files /dev/null and b/public/image/commercialization/chapter_2/2-9.png differ diff --git a/public/image/commercialization/chapter_3/3-1.png b/public/image/commercialization/chapter_3/3-1.png new file mode 100644 index 0000000..7e0d182 Binary files /dev/null and b/public/image/commercialization/chapter_3/3-1.png differ diff --git a/public/image/commercialization/chapter_3/3-2.png b/public/image/commercialization/chapter_3/3-2.png new file mode 100644 index 0000000..df710d9 Binary files /dev/null and b/public/image/commercialization/chapter_3/3-2.png differ diff --git a/public/image/commercialization/chapter_3/3-3.png b/public/image/commercialization/chapter_3/3-3.png new file mode 100644 index 0000000..dcffb76 Binary files /dev/null and b/public/image/commercialization/chapter_3/3-3.png differ diff --git a/public/image/commercialization/chapter_3/3-4.png b/public/image/commercialization/chapter_3/3-4.png new file mode 100644 index 0000000..bf9c3a5 Binary files /dev/null and b/public/image/commercialization/chapter_3/3-4.png differ diff --git a/public/image/commercialization/chapter_3/3-5.png b/public/image/commercialization/chapter_3/3-5.png new file mode 100644 index 0000000..8455933 Binary files /dev/null and b/public/image/commercialization/chapter_3/3-5.png differ diff --git a/public/image/commercialization/chapter_3/3-6.png b/public/image/commercialization/chapter_3/3-6.png new file mode 100644 index 0000000..4242c3f Binary files /dev/null and b/public/image/commercialization/chapter_3/3-6.png differ diff --git a/public/image/commercialization/chapter_3/3-7.png b/public/image/commercialization/chapter_3/3-7.png new file mode 100644 index 0000000..cd849fb Binary files /dev/null and b/public/image/commercialization/chapter_3/3-7.png differ diff --git a/public/image/commercialization/chapter_3/3-8.png b/public/image/commercialization/chapter_3/3-8.png new file mode 100644 index 0000000..3b30ae1 Binary files /dev/null and b/public/image/commercialization/chapter_3/3-8.png differ diff --git a/public/image/commercialization/chapter_3/3-9.png b/public/image/commercialization/chapter_3/3-9.png new file mode 100644 index 0000000..4e4991e Binary files /dev/null and b/public/image/commercialization/chapter_3/3-9.png differ diff --git a/public/image/commercialization/chapter_4/4-1.png b/public/image/commercialization/chapter_4/4-1.png new file mode 100644 index 0000000..4933a82 Binary files /dev/null and b/public/image/commercialization/chapter_4/4-1.png differ diff --git a/public/image/commercialization/chapter_4/4-10.png b/public/image/commercialization/chapter_4/4-10.png new file mode 100644 index 0000000..b6446c1 Binary files /dev/null and b/public/image/commercialization/chapter_4/4-10.png differ diff --git a/public/image/commercialization/chapter_4/4-11.png b/public/image/commercialization/chapter_4/4-11.png new file mode 100644 index 0000000..551b5eb Binary files /dev/null and b/public/image/commercialization/chapter_4/4-11.png differ diff --git a/public/image/commercialization/chapter_4/4-12.png b/public/image/commercialization/chapter_4/4-12.png new file mode 100644 index 0000000..f73e708 Binary files /dev/null and b/public/image/commercialization/chapter_4/4-12.png differ diff --git a/public/image/commercialization/chapter_4/4-2.png b/public/image/commercialization/chapter_4/4-2.png new file mode 100644 index 0000000..db29541 Binary files /dev/null and b/public/image/commercialization/chapter_4/4-2.png differ diff --git a/public/image/commercialization/chapter_4/4-3.png b/public/image/commercialization/chapter_4/4-3.png new file mode 100644 index 0000000..46ce56f Binary files /dev/null and b/public/image/commercialization/chapter_4/4-3.png differ diff --git a/public/image/commercialization/chapter_4/4-4.png b/public/image/commercialization/chapter_4/4-4.png new file mode 100644 index 0000000..7985fa3 Binary files /dev/null and b/public/image/commercialization/chapter_4/4-4.png differ diff --git a/public/image/commercialization/chapter_4/4-5.png b/public/image/commercialization/chapter_4/4-5.png new file mode 100644 index 0000000..96addcc Binary files /dev/null and b/public/image/commercialization/chapter_4/4-5.png differ diff --git a/public/image/commercialization/chapter_4/4-6.png b/public/image/commercialization/chapter_4/4-6.png new file mode 100644 index 0000000..05fb1f8 Binary files /dev/null and b/public/image/commercialization/chapter_4/4-6.png differ diff --git a/public/image/commercialization/chapter_4/4-7.png b/public/image/commercialization/chapter_4/4-7.png new file mode 100644 index 0000000..ca0757e Binary files /dev/null and b/public/image/commercialization/chapter_4/4-7.png differ diff --git a/public/image/commercialization/chapter_4/4-8.png b/public/image/commercialization/chapter_4/4-8.png new file mode 100644 index 0000000..35e00b7 Binary files /dev/null and b/public/image/commercialization/chapter_4/4-8.png differ diff --git a/public/image/commercialization/chapter_4/4-9.png b/public/image/commercialization/chapter_4/4-9.png new file mode 100644 index 0000000..1a15fb3 Binary files /dev/null and b/public/image/commercialization/chapter_4/4-9.png differ diff --git a/public/image/data/chapter_1/1-1.png b/public/image/data/chapter_1/1-1.png new file mode 100644 index 0000000..846120e Binary files /dev/null and b/public/image/data/chapter_1/1-1.png differ diff --git a/public/image/data/chapter_1/1-10.png b/public/image/data/chapter_1/1-10.png new file mode 100644 index 0000000..161fdf5 Binary files /dev/null and b/public/image/data/chapter_1/1-10.png differ diff --git a/public/image/data/chapter_1/1-11.png b/public/image/data/chapter_1/1-11.png new file mode 100644 index 0000000..59fbba0 Binary files /dev/null and b/public/image/data/chapter_1/1-11.png differ diff --git a/public/image/data/chapter_1/1-12.png b/public/image/data/chapter_1/1-12.png new file mode 100644 index 0000000..2465387 Binary files /dev/null and b/public/image/data/chapter_1/1-12.png differ diff --git a/public/image/data/chapter_1/1-13.png b/public/image/data/chapter_1/1-13.png new file mode 100644 index 0000000..0c84d05 Binary files /dev/null and b/public/image/data/chapter_1/1-13.png differ diff --git a/public/image/data/chapter_1/1-14.png b/public/image/data/chapter_1/1-14.png new file mode 100644 index 0000000..27ab395 Binary files /dev/null and b/public/image/data/chapter_1/1-14.png differ diff --git a/public/image/data/chapter_1/1-15.png b/public/image/data/chapter_1/1-15.png new file mode 100644 index 0000000..899689c Binary files /dev/null and b/public/image/data/chapter_1/1-15.png differ diff --git a/public/image/data/chapter_1/1-16.png b/public/image/data/chapter_1/1-16.png new file mode 100644 index 0000000..122c3aa Binary files /dev/null and b/public/image/data/chapter_1/1-16.png differ diff --git a/public/image/data/chapter_1/1-17.png b/public/image/data/chapter_1/1-17.png new file mode 100644 index 0000000..0db76b8 Binary files /dev/null and b/public/image/data/chapter_1/1-17.png differ diff --git a/public/image/data/chapter_1/1-2.png b/public/image/data/chapter_1/1-2.png new file mode 100644 index 0000000..9b0a631 Binary files /dev/null and b/public/image/data/chapter_1/1-2.png differ diff --git a/public/image/data/chapter_1/1-3.png b/public/image/data/chapter_1/1-3.png new file mode 100644 index 0000000..8999e28 Binary files /dev/null and b/public/image/data/chapter_1/1-3.png differ diff --git a/public/image/data/chapter_1/1-4.png b/public/image/data/chapter_1/1-4.png new file mode 100644 index 0000000..b714bf5 Binary files /dev/null and b/public/image/data/chapter_1/1-4.png differ diff --git a/public/image/data/chapter_1/1-5.png b/public/image/data/chapter_1/1-5.png new file mode 100644 index 0000000..421356f Binary files /dev/null and b/public/image/data/chapter_1/1-5.png differ diff --git a/public/image/data/chapter_1/1-6.png b/public/image/data/chapter_1/1-6.png new file mode 100644 index 0000000..a8967c6 Binary files /dev/null and b/public/image/data/chapter_1/1-6.png differ diff --git a/public/image/data/chapter_1/1-7.png b/public/image/data/chapter_1/1-7.png new file mode 100644 index 0000000..1d009a6 Binary files /dev/null and b/public/image/data/chapter_1/1-7.png differ diff --git a/public/image/data/chapter_1/1-8.png b/public/image/data/chapter_1/1-8.png new file mode 100644 index 0000000..49bbaa9 Binary files /dev/null and b/public/image/data/chapter_1/1-8.png differ diff --git a/public/image/data/chapter_1/1-9.png b/public/image/data/chapter_1/1-9.png new file mode 100644 index 0000000..889c6be Binary files /dev/null and b/public/image/data/chapter_1/1-9.png differ diff --git a/public/image/data/chapter_2/2-1.png b/public/image/data/chapter_2/2-1.png new file mode 100644 index 0000000..35eab7d Binary files /dev/null and b/public/image/data/chapter_2/2-1.png differ diff --git a/public/image/data/chapter_2/2-2.png b/public/image/data/chapter_2/2-2.png new file mode 100644 index 0000000..363350a Binary files /dev/null and b/public/image/data/chapter_2/2-2.png differ diff --git a/public/image/data/chapter_2/2-3.png b/public/image/data/chapter_2/2-3.png new file mode 100644 index 0000000..1bfa39c Binary files /dev/null and b/public/image/data/chapter_2/2-3.png differ diff --git a/public/image/data/chapter_2/2-4.png b/public/image/data/chapter_2/2-4.png new file mode 100644 index 0000000..e27af4b Binary files /dev/null and b/public/image/data/chapter_2/2-4.png differ diff --git a/public/image/data/chapter_2/2-5.png b/public/image/data/chapter_2/2-5.png new file mode 100644 index 0000000..8863f08 Binary files /dev/null and b/public/image/data/chapter_2/2-5.png differ diff --git a/public/image/data/chapter_2/2-6.png b/public/image/data/chapter_2/2-6.png new file mode 100644 index 0000000..d06e7f9 Binary files /dev/null and b/public/image/data/chapter_2/2-6.png differ diff --git a/public/image/data/chapter_2/2-7.png b/public/image/data/chapter_2/2-7.png new file mode 100644 index 0000000..33939ee Binary files /dev/null and b/public/image/data/chapter_2/2-7.png differ diff --git a/public/image/data/chapter_2/2-8.png b/public/image/data/chapter_2/2-8.png new file mode 100644 index 0000000..9c6cc08 Binary files /dev/null and b/public/image/data/chapter_2/2-8.png differ diff --git a/public/image/data/chapter_3/3-1.png b/public/image/data/chapter_3/3-1.png new file mode 100644 index 0000000..c76bbdd Binary files /dev/null and b/public/image/data/chapter_3/3-1.png differ diff --git a/public/image/data/chapter_3/3-2.png b/public/image/data/chapter_3/3-2.png new file mode 100644 index 0000000..3478a3d Binary files /dev/null and b/public/image/data/chapter_3/3-2.png differ diff --git a/public/image/data/chapter_3/3-3.png b/public/image/data/chapter_3/3-3.png new file mode 100644 index 0000000..071a596 Binary files /dev/null and b/public/image/data/chapter_3/3-3.png differ diff --git a/public/image/data/chapter_3/3-4.png b/public/image/data/chapter_3/3-4.png new file mode 100644 index 0000000..b88a961 Binary files /dev/null and b/public/image/data/chapter_3/3-4.png differ diff --git a/public/image/data/chapter_4/4-1.png b/public/image/data/chapter_4/4-1.png new file mode 100644 index 0000000..1d5d4d1 Binary files /dev/null and b/public/image/data/chapter_4/4-1.png differ diff --git a/public/image/data/chapter_4/4-2.png b/public/image/data/chapter_4/4-2.png new file mode 100644 index 0000000..1cb4f91 Binary files /dev/null and b/public/image/data/chapter_4/4-2.png differ diff --git a/public/image/data/chapter_4/4-3.png b/public/image/data/chapter_4/4-3.png new file mode 100644 index 0000000..2be18db Binary files /dev/null and b/public/image/data/chapter_4/4-3.png differ diff --git a/public/image/data/chapter_4/4-4.png b/public/image/data/chapter_4/4-4.png new file mode 100644 index 0000000..d217928 Binary files /dev/null and b/public/image/data/chapter_4/4-4.png differ diff --git a/public/image/data/chapter_5/5-1.png b/public/image/data/chapter_5/5-1.png new file mode 100644 index 0000000..5790781 Binary files /dev/null and b/public/image/data/chapter_5/5-1.png differ diff --git a/public/image/data/chapter_5/5-10.png b/public/image/data/chapter_5/5-10.png new file mode 100644 index 0000000..4e1d7de Binary files /dev/null and b/public/image/data/chapter_5/5-10.png differ diff --git a/public/image/data/chapter_5/5-11.png b/public/image/data/chapter_5/5-11.png new file mode 100644 index 0000000..ed55f30 Binary files /dev/null and b/public/image/data/chapter_5/5-11.png differ diff --git a/public/image/data/chapter_5/5-12.png b/public/image/data/chapter_5/5-12.png new file mode 100644 index 0000000..2c355c9 Binary files /dev/null and b/public/image/data/chapter_5/5-12.png differ diff --git a/public/image/data/chapter_5/5-13.png b/public/image/data/chapter_5/5-13.png new file mode 100644 index 0000000..5f9a711 Binary files /dev/null and b/public/image/data/chapter_5/5-13.png differ diff --git a/public/image/data/chapter_5/5-14.png b/public/image/data/chapter_5/5-14.png new file mode 100644 index 0000000..c7ab773 Binary files /dev/null and b/public/image/data/chapter_5/5-14.png differ diff --git a/public/image/data/chapter_5/5-2.png b/public/image/data/chapter_5/5-2.png new file mode 100644 index 0000000..fd2b1bd Binary files /dev/null and b/public/image/data/chapter_5/5-2.png differ diff --git a/public/image/data/chapter_5/5-3.png b/public/image/data/chapter_5/5-3.png new file mode 100644 index 0000000..0f8f74d Binary files /dev/null and b/public/image/data/chapter_5/5-3.png differ diff --git a/public/image/data/chapter_5/5-4.png b/public/image/data/chapter_5/5-4.png new file mode 100644 index 0000000..b382709 Binary files /dev/null and b/public/image/data/chapter_5/5-4.png differ diff --git a/public/image/data/chapter_5/5-5.png b/public/image/data/chapter_5/5-5.png new file mode 100644 index 0000000..7dddf4d Binary files /dev/null and b/public/image/data/chapter_5/5-5.png differ diff --git a/public/image/data/chapter_5/5-6.png b/public/image/data/chapter_5/5-6.png new file mode 100644 index 0000000..8ea095d Binary files /dev/null and b/public/image/data/chapter_5/5-6.png differ diff --git a/public/image/data/chapter_5/5-7.png b/public/image/data/chapter_5/5-7.png new file mode 100644 index 0000000..98429e8 Binary files /dev/null and b/public/image/data/chapter_5/5-7.png differ diff --git a/public/image/data/chapter_5/5-8.png b/public/image/data/chapter_5/5-8.png new file mode 100644 index 0000000..cfc8682 Binary files /dev/null and b/public/image/data/chapter_5/5-8.png differ diff --git a/public/image/data/chapter_5/5-9.png b/public/image/data/chapter_5/5-9.png new file mode 100644 index 0000000..0ae7639 Binary files /dev/null and b/public/image/data/chapter_5/5-9.png differ diff --git a/public/image/data/chapter_6/6-1.png b/public/image/data/chapter_6/6-1.png new file mode 100644 index 0000000..cfdd879 Binary files /dev/null and b/public/image/data/chapter_6/6-1.png differ diff --git a/public/image/data/chapter_6/6-10.png b/public/image/data/chapter_6/6-10.png new file mode 100644 index 0000000..01b4e62 Binary files /dev/null and b/public/image/data/chapter_6/6-10.png differ diff --git a/public/image/data/chapter_6/6-11.png b/public/image/data/chapter_6/6-11.png new file mode 100644 index 0000000..df07485 Binary files /dev/null and b/public/image/data/chapter_6/6-11.png differ diff --git a/public/image/data/chapter_6/6-12.png b/public/image/data/chapter_6/6-12.png new file mode 100644 index 0000000..8f9d10d Binary files /dev/null and b/public/image/data/chapter_6/6-12.png differ diff --git a/public/image/data/chapter_6/6-13.png b/public/image/data/chapter_6/6-13.png new file mode 100644 index 0000000..8c7dc34 Binary files /dev/null and b/public/image/data/chapter_6/6-13.png differ diff --git a/public/image/data/chapter_6/6-14.png b/public/image/data/chapter_6/6-14.png new file mode 100644 index 0000000..4905c13 Binary files /dev/null and b/public/image/data/chapter_6/6-14.png differ diff --git a/public/image/data/chapter_6/6-15.png b/public/image/data/chapter_6/6-15.png new file mode 100644 index 0000000..8c2734f Binary files /dev/null and b/public/image/data/chapter_6/6-15.png differ diff --git a/public/image/data/chapter_6/6-16.png b/public/image/data/chapter_6/6-16.png new file mode 100644 index 0000000..3d26da0 Binary files /dev/null and b/public/image/data/chapter_6/6-16.png differ diff --git a/public/image/data/chapter_6/6-17.png b/public/image/data/chapter_6/6-17.png new file mode 100644 index 0000000..3564a5d Binary files /dev/null and b/public/image/data/chapter_6/6-17.png differ diff --git a/public/image/data/chapter_6/6-18.png b/public/image/data/chapter_6/6-18.png new file mode 100644 index 0000000..1f4eedb Binary files /dev/null and b/public/image/data/chapter_6/6-18.png differ diff --git a/public/image/data/chapter_6/6-19.png b/public/image/data/chapter_6/6-19.png new file mode 100644 index 0000000..418a2ac Binary files /dev/null and b/public/image/data/chapter_6/6-19.png differ diff --git a/public/image/data/chapter_6/6-2.png b/public/image/data/chapter_6/6-2.png new file mode 100644 index 0000000..15e3ba6 Binary files /dev/null and b/public/image/data/chapter_6/6-2.png differ diff --git a/public/image/data/chapter_6/6-20.png b/public/image/data/chapter_6/6-20.png new file mode 100644 index 0000000..460a6da Binary files /dev/null and b/public/image/data/chapter_6/6-20.png differ diff --git a/public/image/data/chapter_6/6-21.png b/public/image/data/chapter_6/6-21.png new file mode 100644 index 0000000..e2b28df Binary files /dev/null and b/public/image/data/chapter_6/6-21.png differ diff --git a/public/image/data/chapter_6/6-3.png b/public/image/data/chapter_6/6-3.png new file mode 100644 index 0000000..cf7d0ca Binary files /dev/null and b/public/image/data/chapter_6/6-3.png differ diff --git a/public/image/data/chapter_6/6-4.png b/public/image/data/chapter_6/6-4.png new file mode 100644 index 0000000..86146ac Binary files /dev/null and b/public/image/data/chapter_6/6-4.png differ diff --git a/public/image/data/chapter_6/6-5.png b/public/image/data/chapter_6/6-5.png new file mode 100644 index 0000000..a255bf2 Binary files /dev/null and b/public/image/data/chapter_6/6-5.png differ diff --git a/public/image/data/chapter_6/6-6.png b/public/image/data/chapter_6/6-6.png new file mode 100644 index 0000000..795542e Binary files /dev/null and b/public/image/data/chapter_6/6-6.png differ diff --git a/public/image/data/chapter_6/6-7.png b/public/image/data/chapter_6/6-7.png new file mode 100644 index 0000000..4372aad Binary files /dev/null and b/public/image/data/chapter_6/6-7.png differ diff --git a/public/image/data/chapter_6/6-8.png b/public/image/data/chapter_6/6-8.png new file mode 100644 index 0000000..bac5dc7 Binary files /dev/null and b/public/image/data/chapter_6/6-8.png differ diff --git a/public/image/data/chapter_6/6-9.png b/public/image/data/chapter_6/6-9.png new file mode 100644 index 0000000..447c211 Binary files /dev/null and b/public/image/data/chapter_6/6-9.png differ diff --git a/public/image/data/chapter_7/7-1.png b/public/image/data/chapter_7/7-1.png new file mode 100644 index 0000000..8702ae6 Binary files /dev/null and b/public/image/data/chapter_7/7-1.png differ diff --git a/public/image/data/chapter_7/7-10.png b/public/image/data/chapter_7/7-10.png new file mode 100644 index 0000000..80dfc93 Binary files /dev/null and b/public/image/data/chapter_7/7-10.png differ diff --git a/public/image/data/chapter_7/7-11.png b/public/image/data/chapter_7/7-11.png new file mode 100644 index 0000000..be96559 Binary files /dev/null and b/public/image/data/chapter_7/7-11.png differ diff --git a/public/image/data/chapter_7/7-12.png b/public/image/data/chapter_7/7-12.png new file mode 100644 index 0000000..c9fb977 Binary files /dev/null and b/public/image/data/chapter_7/7-12.png differ diff --git a/public/image/data/chapter_7/7-13.png b/public/image/data/chapter_7/7-13.png new file mode 100644 index 0000000..bfdfcfd Binary files /dev/null and b/public/image/data/chapter_7/7-13.png differ diff --git a/public/image/data/chapter_7/7-14.png b/public/image/data/chapter_7/7-14.png new file mode 100644 index 0000000..0b8f57a Binary files /dev/null and b/public/image/data/chapter_7/7-14.png differ diff --git a/public/image/data/chapter_7/7-15.png b/public/image/data/chapter_7/7-15.png new file mode 100644 index 0000000..1fd150d Binary files /dev/null and b/public/image/data/chapter_7/7-15.png differ diff --git a/public/image/data/chapter_7/7-16.png b/public/image/data/chapter_7/7-16.png new file mode 100644 index 0000000..f8d342e Binary files /dev/null and b/public/image/data/chapter_7/7-16.png differ diff --git a/public/image/data/chapter_7/7-17.png b/public/image/data/chapter_7/7-17.png new file mode 100644 index 0000000..979e1e7 Binary files /dev/null and b/public/image/data/chapter_7/7-17.png differ diff --git a/public/image/data/chapter_7/7-18.png b/public/image/data/chapter_7/7-18.png new file mode 100644 index 0000000..e76ce5e Binary files /dev/null and b/public/image/data/chapter_7/7-18.png differ diff --git a/public/image/data/chapter_7/7-2.png b/public/image/data/chapter_7/7-2.png new file mode 100644 index 0000000..2c21609 Binary files /dev/null and b/public/image/data/chapter_7/7-2.png differ diff --git a/public/image/data/chapter_7/7-20.png b/public/image/data/chapter_7/7-20.png new file mode 100644 index 0000000..7ab7418 Binary files /dev/null and b/public/image/data/chapter_7/7-20.png differ diff --git a/public/image/data/chapter_7/7-21.png b/public/image/data/chapter_7/7-21.png new file mode 100644 index 0000000..613f4d9 Binary files /dev/null and b/public/image/data/chapter_7/7-21.png differ diff --git a/public/image/data/chapter_7/7-22.png b/public/image/data/chapter_7/7-22.png new file mode 100644 index 0000000..07d95e2 Binary files /dev/null and b/public/image/data/chapter_7/7-22.png differ diff --git a/public/image/data/chapter_7/7-23.png b/public/image/data/chapter_7/7-23.png new file mode 100644 index 0000000..9bec3fa Binary files /dev/null and b/public/image/data/chapter_7/7-23.png differ diff --git a/public/image/data/chapter_7/7-24.png b/public/image/data/chapter_7/7-24.png new file mode 100644 index 0000000..cff1fe3 Binary files /dev/null and b/public/image/data/chapter_7/7-24.png differ diff --git a/public/image/data/chapter_7/7-25.png b/public/image/data/chapter_7/7-25.png new file mode 100644 index 0000000..19c5953 Binary files /dev/null and b/public/image/data/chapter_7/7-25.png differ diff --git a/public/image/data/chapter_7/7-3.png b/public/image/data/chapter_7/7-3.png new file mode 100644 index 0000000..f7f9cf5 Binary files /dev/null and b/public/image/data/chapter_7/7-3.png differ diff --git a/public/image/data/chapter_7/7-4.png b/public/image/data/chapter_7/7-4.png new file mode 100644 index 0000000..80e8752 Binary files /dev/null and b/public/image/data/chapter_7/7-4.png differ diff --git a/public/image/data/chapter_7/7-5.png b/public/image/data/chapter_7/7-5.png new file mode 100644 index 0000000..fd54539 Binary files /dev/null and b/public/image/data/chapter_7/7-5.png differ diff --git a/public/image/data/chapter_7/7-6.png b/public/image/data/chapter_7/7-6.png new file mode 100644 index 0000000..7a79597 Binary files /dev/null and b/public/image/data/chapter_7/7-6.png differ diff --git a/public/image/data/chapter_7/7-7.png b/public/image/data/chapter_7/7-7.png new file mode 100644 index 0000000..7e9ac33 Binary files /dev/null and b/public/image/data/chapter_7/7-7.png differ diff --git a/public/image/data/chapter_7/7-8.png b/public/image/data/chapter_7/7-8.png new file mode 100644 index 0000000..86891a0 Binary files /dev/null and b/public/image/data/chapter_7/7-8.png differ diff --git a/public/image/data/chapter_7/7-9.png b/public/image/data/chapter_7/7-9.png new file mode 100644 index 0000000..9d0f0bd Binary files /dev/null and b/public/image/data/chapter_7/7-9.png differ diff --git a/public/image/data/chapter_8/8-1.png b/public/image/data/chapter_8/8-1.png new file mode 100644 index 0000000..f9564dc Binary files /dev/null and b/public/image/data/chapter_8/8-1.png differ diff --git a/public/image/data/chapter_8/8-2.png b/public/image/data/chapter_8/8-2.png new file mode 100644 index 0000000..9f877a7 Binary files /dev/null and b/public/image/data/chapter_8/8-2.png differ diff --git a/public/image/data/chapter_8/8-3.png b/public/image/data/chapter_8/8-3.png new file mode 100644 index 0000000..995ea38 Binary files /dev/null and b/public/image/data/chapter_8/8-3.png differ diff --git a/public/image/home/KaiYuanShe-logo.png b/public/image/home/KaiYuanShe-logo.png new file mode 100644 index 0000000..609d908 Binary files /dev/null and b/public/image/home/KaiYuanShe-logo.png differ diff --git a/public/image/home/avatar/INP.png b/public/image/home/avatar/INP.png new file mode 100644 index 0000000..07716aa Binary files /dev/null and b/public/image/home/avatar/INP.png differ diff --git "a/public/image/home/avatar/\344\270\201\346\226\207\346\230\212.png" "b/public/image/home/avatar/\344\270\201\346\226\207\346\230\212.png" new file mode 100644 index 0000000..b54cfea Binary files /dev/null and "b/public/image/home/avatar/\344\270\201\346\226\207\346\230\212.png" differ diff --git "a/public/image/home/avatar/\344\272\221\345\220\257\350\265\204\346\234\254.jpg" "b/public/image/home/avatar/\344\272\221\345\220\257\350\265\204\346\234\254.jpg" new file mode 100644 index 0000000..bb48235 Binary files /dev/null and "b/public/image/home/avatar/\344\272\221\345\220\257\350\265\204\346\234\254.jpg" differ diff --git "a/public/image/home/avatar/\344\274\215\346\263\260\347\202\234.jpg" "b/public/image/home/avatar/\344\274\215\346\263\260\347\202\234.jpg" new file mode 100644 index 0000000..d99ab0f Binary files /dev/null and "b/public/image/home/avatar/\344\274\215\346\263\260\347\202\234.jpg" differ diff --git "a/public/image/home/avatar/\345\210\230\345\244\251\346\240\213.jpg" "b/public/image/home/avatar/\345\210\230\345\244\251\346\240\213.jpg" new file mode 100644 index 0000000..28c9bcc Binary files /dev/null and "b/public/image/home/avatar/\345\210\230\345\244\251\346\240\213.jpg" differ diff --git "a/public/image/home/avatar/\345\224\220\347\203\250\347\224\267.png" "b/public/image/home/avatar/\345\224\220\347\203\250\347\224\267.png" new file mode 100644 index 0000000..96b2b0d Binary files /dev/null and "b/public/image/home/avatar/\345\224\220\347\203\250\347\224\267.png" differ diff --git "a/public/image/home/avatar/\345\244\217\345\260\217\351\233\205.jpg" "b/public/image/home/avatar/\345\244\217\345\260\217\351\233\205.jpg" new file mode 100644 index 0000000..5d89435 Binary files /dev/null and "b/public/image/home/avatar/\345\244\217\345\260\217\351\233\205.jpg" differ diff --git "a/public/image/home/avatar/\345\250\204\346\263\275\345\215\216.jpg" "b/public/image/home/avatar/\345\250\204\346\263\275\345\215\216.jpg" new file mode 100644 index 0000000..a97c2c8 Binary files /dev/null and "b/public/image/home/avatar/\345\250\204\346\263\275\345\215\216.jpg" differ diff --git "a/public/image/home/avatar/\345\256\201\345\277\227\346\210\220.jpg" "b/public/image/home/avatar/\345\256\201\345\277\227\346\210\220.jpg" new file mode 100644 index 0000000..947bebb Binary files /dev/null and "b/public/image/home/avatar/\345\256\201\345\277\227\346\210\220.jpg" differ diff --git "a/public/image/home/avatar/\345\272\204\350\241\250\344\274\237.jpg" "b/public/image/home/avatar/\345\272\204\350\241\250\344\274\237.jpg" new file mode 100644 index 0000000..dc688a1 Binary files /dev/null and "b/public/image/home/avatar/\345\272\204\350\241\250\344\274\237.jpg" differ diff --git "a/public/image/home/avatar/\345\274\240\346\254\243\347\204\266.jpg" "b/public/image/home/avatar/\345\274\240\346\254\243\347\204\266.jpg" new file mode 100644 index 0000000..05b00c1 Binary files /dev/null and "b/public/image/home/avatar/\345\274\240\346\254\243\347\204\266.jpg" differ diff --git "a/public/image/home/avatar/\345\274\240\347\277\224\345\256\207.jpg" "b/public/image/home/avatar/\345\274\240\347\277\224\345\256\207.jpg" new file mode 100644 index 0000000..60a263e Binary files /dev/null and "b/public/image/home/avatar/\345\274\240\347\277\224\345\256\207.jpg" differ diff --git "a/public/image/home/avatar/\346\234\261\345\277\227\347\202\234.jpg" "b/public/image/home/avatar/\346\234\261\345\277\227\347\202\234.jpg" new file mode 100644 index 0000000..c5a2de6 Binary files /dev/null and "b/public/image/home/avatar/\346\234\261\345\277\227\347\202\234.jpg" differ diff --git "a/public/image/home/avatar/\346\235\216\346\230\216\345\272\267.jpg" "b/public/image/home/avatar/\346\235\216\346\230\216\345\272\267.jpg" new file mode 100644 index 0000000..39517cc Binary files /dev/null and "b/public/image/home/avatar/\346\235\216\346\230\216\345\272\267.jpg" differ diff --git "a/public/image/home/avatar/\346\235\216\351\270\277\346\226\214.jpg" "b/public/image/home/avatar/\346\235\216\351\270\277\346\226\214.jpg" new file mode 100644 index 0000000..988f96a Binary files /dev/null and "b/public/image/home/avatar/\346\235\216\351\270\277\346\226\214.jpg" differ diff --git "a/public/image/home/avatar/\346\242\201\345\260\247.jpg" "b/public/image/home/avatar/\346\242\201\345\260\247.jpg" new file mode 100644 index 0000000..db6c203 Binary files /dev/null and "b/public/image/home/avatar/\346\242\201\345\260\247.jpg" differ diff --git "a/public/image/home/avatar/\346\257\225\346\236\253\346\236\227.jpg" "b/public/image/home/avatar/\346\257\225\346\236\253\346\236\227.jpg" new file mode 100644 index 0000000..699b08c Binary files /dev/null and "b/public/image/home/avatar/\346\257\225\346\236\253\346\236\227.jpg" differ diff --git "a/public/image/home/avatar/\347\216\213\344\274\237.jpg" "b/public/image/home/avatar/\347\216\213\344\274\237.jpg" new file mode 100644 index 0000000..9e98db2 Binary files /dev/null and "b/public/image/home/avatar/\347\216\213\344\274\237.jpg" differ diff --git "a/public/image/home/avatar/\347\216\213\345\251\225.jpg" "b/public/image/home/avatar/\347\216\213\345\251\225.jpg" new file mode 100644 index 0000000..b98e122 Binary files /dev/null and "b/public/image/home/avatar/\347\216\213\345\251\225.jpg" differ diff --git "a/public/image/home/avatar/\347\216\213\350\223\211.png" "b/public/image/home/avatar/\347\216\213\350\223\211.png" new file mode 100644 index 0000000..3de38c0 Binary files /dev/null and "b/public/image/home/avatar/\347\216\213\350\223\211.png" differ diff --git "a/public/image/home/avatar/\350\242\201\346\273\232\346\273\232.jpg" "b/public/image/home/avatar/\350\242\201\346\273\232\346\273\232.jpg" new file mode 100644 index 0000000..72896e5 Binary files /dev/null and "b/public/image/home/avatar/\350\242\201\346\273\232\346\273\232.jpg" differ diff --git "a/public/image/home/avatar/\350\265\265\347\224\237\345\256\207.jpg" "b/public/image/home/avatar/\350\265\265\347\224\237\345\256\207.jpg" new file mode 100644 index 0000000..1a8a69c Binary files /dev/null and "b/public/image/home/avatar/\350\265\265\347\224\237\345\256\207.jpg" differ diff --git "a/public/image/home/avatar/\351\237\251\345\207\241\345\256\207.jpg" "b/public/image/home/avatar/\351\237\251\345\207\241\345\256\207.jpg" new file mode 100644 index 0000000..33a6151 Binary files /dev/null and "b/public/image/home/avatar/\351\237\251\345\207\241\345\256\207.jpg" differ diff --git "a/public/image/home/avatar/\351\273\204\346\270\251\347\221\236.jpg" "b/public/image/home/avatar/\351\273\204\346\270\251\347\221\236.jpg" new file mode 100644 index 0000000..b2fb930 Binary files /dev/null and "b/public/image/home/avatar/\351\273\204\346\270\251\347\221\236.jpg" differ diff --git a/public/image/home/csdn_logo.jpg b/public/image/home/csdn_logo.jpg new file mode 100644 index 0000000..7769be4 Binary files /dev/null and b/public/image/home/csdn_logo.jpg differ diff --git a/public/image/home/sf_logo.png b/public/image/home/sf_logo.png new file mode 100644 index 0000000..565f8ef Binary files /dev/null and b/public/image/home/sf_logo.png differ diff --git a/public/image/home/x_lab2017_logo.jpg b/public/image/home/x_lab2017_logo.jpg new file mode 100644 index 0000000..6c88ef9 Binary files /dev/null and b/public/image/home/x_lab2017_logo.jpg differ diff --git a/public/image/home/yunqi_partnets_logo.jpg b/public/image/home/yunqi_partnets_logo.jpg new file mode 100644 index 0000000..bb48235 Binary files /dev/null and b/public/image/home/yunqi_partnets_logo.jpg differ diff --git a/public/image/questionnaire/3.1-1.png b/public/image/questionnaire/3.1-1.png new file mode 100644 index 0000000..f42eac0 Binary files /dev/null and b/public/image/questionnaire/3.1-1.png differ diff --git a/public/image/questionnaire/3.1-2.png b/public/image/questionnaire/3.1-2.png new file mode 100644 index 0000000..417fd7f Binary files /dev/null and b/public/image/questionnaire/3.1-2.png differ diff --git a/public/image/questionnaire/3.1-3.png b/public/image/questionnaire/3.1-3.png new file mode 100644 index 0000000..f320d2e Binary files /dev/null and b/public/image/questionnaire/3.1-3.png differ diff --git a/public/image/questionnaire/3.1-4.png b/public/image/questionnaire/3.1-4.png new file mode 100644 index 0000000..fada533 Binary files /dev/null and b/public/image/questionnaire/3.1-4.png differ diff --git a/public/image/questionnaire/3.1-5.png b/public/image/questionnaire/3.1-5.png new file mode 100644 index 0000000..32b0307 Binary files /dev/null and b/public/image/questionnaire/3.1-5.png differ diff --git a/public/image/questionnaire/3.1-6.png b/public/image/questionnaire/3.1-6.png new file mode 100644 index 0000000..3845c68 Binary files /dev/null and b/public/image/questionnaire/3.1-6.png differ diff --git a/public/image/questionnaire/3.1-7.png b/public/image/questionnaire/3.1-7.png new file mode 100644 index 0000000..4dd46ec Binary files /dev/null and b/public/image/questionnaire/3.1-7.png differ diff --git a/public/image/questionnaire/3.1-8.png b/public/image/questionnaire/3.1-8.png new file mode 100644 index 0000000..ba3d12b Binary files /dev/null and b/public/image/questionnaire/3.1-8.png differ diff --git a/public/image/questionnaire/3.1-9.png b/public/image/questionnaire/3.1-9.png new file mode 100644 index 0000000..9b018dc Binary files /dev/null and b/public/image/questionnaire/3.1-9.png differ diff --git a/public/image/questionnaire/3.2-1.png b/public/image/questionnaire/3.2-1.png new file mode 100644 index 0000000..b7fac7f Binary files /dev/null and b/public/image/questionnaire/3.2-1.png differ diff --git a/public/image/questionnaire/3.2-10.png b/public/image/questionnaire/3.2-10.png new file mode 100644 index 0000000..2d5a806 Binary files /dev/null and b/public/image/questionnaire/3.2-10.png differ diff --git a/public/image/questionnaire/3.2-2.png b/public/image/questionnaire/3.2-2.png new file mode 100644 index 0000000..8dde9e2 Binary files /dev/null and b/public/image/questionnaire/3.2-2.png differ diff --git a/public/image/questionnaire/3.2-3.png b/public/image/questionnaire/3.2-3.png new file mode 100644 index 0000000..dc1d863 Binary files /dev/null and b/public/image/questionnaire/3.2-3.png differ diff --git a/public/image/questionnaire/3.2-4.png b/public/image/questionnaire/3.2-4.png new file mode 100644 index 0000000..bcea9fb Binary files /dev/null and b/public/image/questionnaire/3.2-4.png differ diff --git a/public/image/questionnaire/3.2-5.png b/public/image/questionnaire/3.2-5.png new file mode 100644 index 0000000..2327011 Binary files /dev/null and b/public/image/questionnaire/3.2-5.png differ diff --git a/public/image/questionnaire/3.2-6.png b/public/image/questionnaire/3.2-6.png new file mode 100644 index 0000000..2290c98 Binary files /dev/null and b/public/image/questionnaire/3.2-6.png differ diff --git a/public/image/questionnaire/3.2-7.png b/public/image/questionnaire/3.2-7.png new file mode 100644 index 0000000..e2386ae Binary files /dev/null and b/public/image/questionnaire/3.2-7.png differ diff --git a/public/image/questionnaire/3.2-8.png b/public/image/questionnaire/3.2-8.png new file mode 100644 index 0000000..e4c7708 Binary files /dev/null and b/public/image/questionnaire/3.2-8.png differ diff --git a/public/image/questionnaire/3.2-9.png b/public/image/questionnaire/3.2-9.png new file mode 100644 index 0000000..abd0cdc Binary files /dev/null and b/public/image/questionnaire/3.2-9.png differ diff --git a/public/image/questionnaire/3.3-1.png b/public/image/questionnaire/3.3-1.png new file mode 100644 index 0000000..f24f4ea Binary files /dev/null and b/public/image/questionnaire/3.3-1.png differ diff --git a/public/image/questionnaire/3.3-2.png b/public/image/questionnaire/3.3-2.png new file mode 100644 index 0000000..8d76f3e Binary files /dev/null and b/public/image/questionnaire/3.3-2.png differ diff --git a/public/image/questionnaire/3.3-3.png b/public/image/questionnaire/3.3-3.png new file mode 100644 index 0000000..95a441c Binary files /dev/null and b/public/image/questionnaire/3.3-3.png differ diff --git a/public/image/questionnaire/3.3-4.png b/public/image/questionnaire/3.3-4.png new file mode 100644 index 0000000..86b66e7 Binary files /dev/null and b/public/image/questionnaire/3.3-4.png differ diff --git a/public/image/questionnaire/3.3-5.png b/public/image/questionnaire/3.3-5.png new file mode 100644 index 0000000..2dcc2aa Binary files /dev/null and b/public/image/questionnaire/3.3-5.png differ diff --git a/public/image/questionnaire/3.3-6.png b/public/image/questionnaire/3.3-6.png new file mode 100644 index 0000000..aa092dd Binary files /dev/null and b/public/image/questionnaire/3.3-6.png differ diff --git a/public/image/questionnaire/3.3-7.png b/public/image/questionnaire/3.3-7.png new file mode 100644 index 0000000..26a5f88 Binary files /dev/null and b/public/image/questionnaire/3.3-7.png differ diff --git a/public/image/questionnaire/3.3-8.png b/public/image/questionnaire/3.3-8.png new file mode 100644 index 0000000..795c0ed Binary files /dev/null and b/public/image/questionnaire/3.3-8.png differ diff --git a/public/image/questionnaire/3.4-1.png b/public/image/questionnaire/3.4-1.png new file mode 100644 index 0000000..4711de4 Binary files /dev/null and b/public/image/questionnaire/3.4-1.png differ diff --git a/public/image/questionnaire/3.4-2.png b/public/image/questionnaire/3.4-2.png new file mode 100644 index 0000000..6edeed5 Binary files /dev/null and b/public/image/questionnaire/3.4-2.png differ diff --git a/public/image/questionnaire/3.4-3.png b/public/image/questionnaire/3.4-3.png new file mode 100644 index 0000000..a4223ff Binary files /dev/null and b/public/image/questionnaire/3.4-3.png differ diff --git a/public/image/questionnaire/3.4-4.png b/public/image/questionnaire/3.4-4.png new file mode 100644 index 0000000..6447f5c Binary files /dev/null and b/public/image/questionnaire/3.4-4.png differ diff --git a/public/image/questionnaire/3.4-5.png b/public/image/questionnaire/3.4-5.png new file mode 100644 index 0000000..73036a9 Binary files /dev/null and b/public/image/questionnaire/3.4-5.png differ diff --git a/public/image/questionnaire/3.4-6.png b/public/image/questionnaire/3.4-6.png new file mode 100644 index 0000000..2362c14 Binary files /dev/null and b/public/image/questionnaire/3.4-6.png differ diff --git a/public/image/questionnaire/3.5-1.png b/public/image/questionnaire/3.5-1.png new file mode 100644 index 0000000..275661b Binary files /dev/null and b/public/image/questionnaire/3.5-1.png differ diff --git a/public/image/questionnaire/3.5-2.png b/public/image/questionnaire/3.5-2.png new file mode 100644 index 0000000..53b4285 Binary files /dev/null and b/public/image/questionnaire/3.5-2.png differ diff --git a/public/image/questionnaire/3.5-3.png b/public/image/questionnaire/3.5-3.png new file mode 100644 index 0000000..2f64c62 Binary files /dev/null and b/public/image/questionnaire/3.5-3.png differ diff --git a/public/image/questionnaire/3.5-4.png b/public/image/questionnaire/3.5-4.png new file mode 100644 index 0000000..0f6ac72 Binary files /dev/null and b/public/image/questionnaire/3.5-4.png differ diff --git a/public/image/questionnaire/3.5-5.png b/public/image/questionnaire/3.5-5.png new file mode 100644 index 0000000..488295c Binary files /dev/null and b/public/image/questionnaire/3.5-5.png differ diff --git a/public/image/questionnaire/3.5-6.png b/public/image/questionnaire/3.5-6.png new file mode 100644 index 0000000..bf2cff9 Binary files /dev/null and b/public/image/questionnaire/3.5-6.png differ diff --git a/questionnaire.md b/questionnaire.md new file mode 100644 index 0000000..4af96a4 --- /dev/null +++ b/questionnaire.md @@ -0,0 +1,260 @@ +--- +outline: deep +--- + +# 问卷篇 + +## 一. 报告背景 + +延续自 2016 年初发布的《 2015 年中国开源社区参与调查报告》以来的传统,2023 年末,我们再次启动了中国开源社区的年度参与调查,致力于通过持续的开发者调查报告,以多维度方式呈现中国开源发展的整体状况。借助数据分析手段和调查报告等工具,我们成功绘制了一份关于 2023 年中国开源世界的地图。 + +本问卷篇对受访者的多个角色进行提问,旨在从各个维度深入了解社区的发展趋势。本问卷根据开源社区参与程度将受访者分为使用者、参与者、贡献者、维护者、生态运营几个角色。形成洋葱模型,层层递进。对于四个角色层级的定义如下: + +- 使用者:使用过某一款或某几款开源产品的用户 +- 参与者:与开源社区有互动行为的用户(例如与开源社区交流沟通、参与开源社区组织的活动等) +- 贡献者:对开源社区有实质性的贡献的用户(包括代码贡献和非代码贡献) +- 维护者:主要负责对开源社区日常运营的用户(包含项目 maintainer、PMC 成员等) + +另外,生态运营为主要负责对开源社区日常运营的用户, 层级在参与者之上,与维护者统称为运营者。本问卷除了对所有受访者提出基础问题以外,还针对使用者、贡献者、运营者几个角色分别进行了提问。 + +本次调查问卷的**基本信息**如下: + +- **调查对象** :覆盖开发者、社区成员、贡献者、学生、政府企业管理人员 +- **调查内容** :主要涵盖个人信息、工作状况、开源社区以及开发者技术等 +- **调查方法** :以在线问卷方式搜集样本和数据,交叉对比法分析数据 +- **分发渠道** :开源社、KubeCon + CloudNativeCon + Open Source Summit China、2023 第八届中国开源年会、2023 开放原子开发者大会、2023 开源产业生态大会 +- **问题类型** :单选、多选、开放性 +- **问题数量** :43 +- **样本量** :875 + +## 二. 问卷结果预览 + +**受访者特征** + +- 受访者年龄分布均匀,受教育程度普遍在本科以上,性别、地区分布符合中国开发者地理分布;涵盖了多种计算机行业职业身份。 + +**开源参与情况** + +- **开源社区的活跃情况**是受访者尤其关注的方面;**人工智能**成为了大多数受访者关注的技术领域。 + +**开源贡献情况** + +- 受访的开源社区贡献者更多在**技术基础类型**的仓库贡献;受访者贡献开源社区的动机多为**社区 / 荣誉激励**,对物质激励的要求较低。 + +**社区运营调查** + +- 大多数受访运营者所在开源社区**有专人负责社区运营**。近半数受访者所在企业注重**开源软件的使用规范和管理**。 + +**国内开源发展调查** + +- 受访者对国内开源未来发展持**乐观态度**。关于人工智能在开源生态的发展态势,开发者普遍看好其在**提高效率、自动化测试和数据分析**方面的应用前景,认为**数据安全、透明度、伦理问题**等是主要面临挑战。 + +## 三. 问卷分析 + +### 3.1. 受访者特征 + +首先,我们从年龄、性别、学历、常驻城市、所处行业、职业身份,角度进行调查,通过这些问题可以了解参与者的基本信息,从而分析开源社区的受众群体特征。 + +#### 3.1.1 年龄、性别、学历、城市 + +| 年龄 | 性别 | +|:----------------------------------------------------------------:|:----------------------------------------------------------------:| +| | | + + +本次问卷受访者年龄分布与往年类似,主要集中在 21-50 岁年龄段,年龄分布较为均衡。值得一提的是,21岁以下的受访者比例为25.71%,较去年的8.42%有显著提升。表年轻受访者的参与度大幅增加。 + +在性别方面,男性受访者占比较高,达到73.37%,女性占比为25.83%。与去年的问卷相比,受访者男女比例显著提高,符合当前开发者男女比例不协调的现状。 + +| 学历 | 地区 | +|:----------------------------------------------------------------:|:----------------------------------------------------------------:| +| | | + + +受访者受教育程度普遍在本科及以上;在城市分布上,来自江苏、四川、上海的受访者占大多数,有一部分原因是我们线下收集问卷的渠道在这些城市。另外北京市、广东省等地也有较多的受访者,整体分布和数据篇中的开发者分布较为一致。 + +#### 3.1.2 所处行业、职业 + +| 行业 | 职业身份 | +|:----------------------------------------------------------------:|:----------------------------------------------------------------:| +| | | + +受访者主要分布在互联网 / IT / 电子 / 通信行业,占比达 72.23%,表明调查对象主要涵盖了科技领域。 + +在职业身份方面,最多的是在校学生,占 43.20%,其次是后端开发者、架构师和学术研究员。整体而言,调查对象以技术从业人员和学生居多,且涵盖了多个计算机行业职业身份。 + +### 3.2 开源参与情况 + +#### 3.2.1 开源社区的参与程度 + +| 开源社区的角色 | 接触开源的时长 | +|:------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------------------------------------:| +| | | + +调查显示,开源社区中绝大多数成员为使用者(73.37%),同时有近半数参与者(49.03%)和少部分贡献者(26.51%)。 + +接触开源的时长方面,三分之一的受访者在开源社区中的接触时长不足 1 年,接近半数的人已经有 3 年以上的经验。 + +我们针对 "您认为自己多大程度上是开源社区一份子" 这个问题,与受访者在开源社区的角色进行了交叉分析。 + +| 认为自己多大程度上是开源社区一份子 | +|:----------------------------------------------------------------:| +| | + +可以看出,开源社区中,维护者、贡献者、生态运营相比参与者、使用者更有归属感。 + +接下来的问题针对在开源社区的角色为“使用者”及以上层级的受访者提出。 + +#### 3.2.2 开源产品使用情况 + +| 选择开源产品的原因 | 影响选择的因素 | +|:----------------------------------------------------------------:|:----------------------------------------------------------------:| +| | | + +使用者选择使用开源软件的主要原因是产品免费,其次是可以进行二次开发和社区氛围良好。 + +在选择开源产品时,参与者更注重代码规范程度、开发者活跃度。这表明用户不仅关注开源产品的功能和质量,还关注社区和开发者的活跃度以及项目的可持续性。 + +| 使用开源产品遇到的问题 | 促使进行开源贡献的因素 | +|:----------------------------------------------------------------:|:----------------------------------------------------------------:| +| | | + +在遇到的问题方面,最常见的是项目缺少文档,其次是不稳定的版本更新。 + +个人兴趣、社区氛围以及提升技术能力等因素在推动开源贡献方面起到了重要作用。 + +#### 3.2.3 技术方向 + +| 感兴趣的技术方向 | 了解的开源许可证 | +|:----------------------------------------------------------------:|:----------------------------------------------------------------:| +| | | + +受访者对人工智能表现出强烈兴趣,占 67.43%,其次是开发工具以及容器化和云计算。 + +关于开源许可证,Apache 是最受欢迎的选择,其次是 MIT 和 GPL。 + +#### 3.2.4 信息交流 + +| 检索开源产品的途径 | 与社区的沟通方式 | +|:----------------------------------------------------------------:|:----------------------------------------------------------------:| +| | | + +在检索开源产品时,大多数人通过代码托管平台搜索,通过技术社区或媒体推荐,以及借助搜索引擎搜索、也是许多人的选择。 + +与开源社区的沟通方式主要为国内通讯工具(如钉钉、微信、QQ、飞书等)和异步沟通工具(如 GitHub Issue、Discussion、Mail List 等),而国际化通讯工具(如 Slack、Skype、Telegram、Lark 等)也被广泛采用。说明国际开源社区多以异步沟通工具为主,与国内有着明显的差异。 + +| 常用的产品 / 技术社区 | 获取开源信息的媒体 | +|:----------------------------------------------------------------:|:----------------------------------------------------------------:| +| | | + +受访者主要通过代码托管平台和进行开源社区的参与。此外,还有一大部分受访者还通过国内技术论坛参与开源社区。 + +在获取开源信息方面,视频平台和问答网站是主要选择,反映了开发者倾向于通过视听和互动问答等方式获取开源知识。 + +### 3.3 开源贡献情况 + +此部分的问题针对在开源社区角色为“贡献者”及以上层级的受访者提出。 + +#### 3.3.1 开源贡献参与程度 + +| 是否参与开源项目活动 | 每周参与开源的时长 | +|:----------------------------------------------------------------:|:----------------------------------------------------------------:| +| | | + +有三分之一的学生开发者参加过谷歌开源之夏(GSoC)、开源供应链点亮计划(OSPP)等开源项目活动;超过半数的贡献者每周参与开源的时长大于 5 小时,此外,有超过 10% 的贡献者每周参与开源的时长达 35 小时,已经接近全职开发者的标准。 + +#### 3.3.2 开源贡献方式 + +| 主要开源贡献平台 | 开源贡献常用开发语言 | +|:----------------------------------------------------------------:|:----------------------------------------------------------------:| +| | | + +GitHub 仍是最受受访者青睐的平台,占据主导地位,其次是 Gitee 和 GitLab。表明在国内开发者中,GitHub 的影响力仍然很大,但国内平台也在逐渐崭露头角。 主要使用的开发语言包括 Python、Java、C、JavaScript、Go。此外,HTML/CSS、TypeScript 等也获得了较高的选择次数。 + +#### 3.3.3 开源贡献内容 + +| 主要贡献类型 | 贡献的项目类型 | +|:--------------------------------------------------:|:----------------------------------------------------------------:| +| | | + +受访者主要通过编写代码和文档来为开源项目做出贡献。此外,开源布道、开源社区运营和协助社区活动举办也是常见的贡献方式。 + +贡献的开源项目类型主要集中在库 / 中间件和通用框架 / 基础设施,体现了开发者对技术基础的深入关注。 + +#### 3.3.4 激励机制 + +| 激励方式 | 财务回报的来源 | +|:----------------------------------------------------------------:|:----------------------------------------------------------------:| +| | | + +各激励方式都受到了积极评价,表明多元化的激励机制对开发者的开源参与产生了积极的影响。具体而言,受访者认为荣誉激励与社交激励对贡献有更显著的正向影响。 + +超过半数开发者参与开源项目没有财务回报。其余开发者通过薪酬 / 工资、悬赏 / 奖励直接获得财务回报,仅有极少数开发者通过广告收入、捐赠和专利 / 知识产权收益等途径获得财务支持。 + +### 3.4 社区运营调查 + +此部分的问题针对在开源社区角色为“运营者”的受访者提出。 + +#### 3.4.1 所在开源社区概况 + +| 社区用户数量 | 活跃开发者 | +|:----------------------------------------------------------------:|:----------------------------------------------------------------:| +| | | + +近 6 成运营者所在开源社区用户数量小于 200 人,近 3 成大于 500 人。半数以上运营者所在开源社区活跃开发者数量小于 20 人。 + +#### 3.4.2 开源社区管理 + +| 社区管理情况 | 社区商业公司支持 | +|:-------------------------------------------------------------------------:|:-------------------------------------------------------------------------:| +| | | + +大约一半的社区有清晰的治理结构和专人负责日常运营。同时,社区普遍制定了明确的规范和提供更新的文档,以支持成员融入。 + +大多数开源社区有商业公司支持,且主要采用声明采用、协同开发的形式。 + +#### 3.4.3 开源软件商业化调研 + +| 所在企业开源软件使用情况 | 是否认同将开源项目商业化 | +|:----------------------------------------------------------------:|:----------------------------------------------------------------:| +| | | + +绝大多数企业曾使用开源软件,其中有明确的使用要求和管理规范与缺乏相应管理规范的样本达 5:6。说明一部分企业采用开源软件时注重规范和管理,但仍有一大部分企业在管理规范上较为松散,这可能受到企业规模、行业差异以及对开源软件理解程度等因素的影响。 + +将开源项目用于商业化的认可程度平均为 3.65,其中 31.66% 的人给出了最高认可评分,表明大多数受访者对此持中等至较高的认可态度。 + +### 3.5 开源发展调研 + +#### 3.5.1 开源发展态势 + +| 开源社区的发展态势 | +|:----------------------------------------------------------------:| +| | + +总体而言,受访者普遍看好国内开源的未来发展,认为在各个方面都呈现出积极的态势。 + +| 开源项目持续发展的特征 | 评价开源项目的指标 | +|:----------------------------------------------------------------:|:----------------------------------------------------------------:| +| | | + +受访者认为影响一个开源社区健康持续发展的最重要特质主要为快速的社区响应速度,有持续涌入的新贡献者和新贡献者能够被转化为长期贡献者。说明长期可持续性对于社区的成功发展至关重要。 + +受访者在评价开源项目时主要关注体项目影响力、权威性、社区的活跃程度和是否有持续的更新和维护。反映了开发者对于项目在技术和社区层面的整体健康状况的关切。 + +#### 3.5.2 人工智能对开发者及开源生态的影响与挑战 + +| AI 对开发者的影响 | AI 未来在开源社区的角色 | +|:----------------------------------------------------------------:|:----------------------------------------------------------------:| +| | | + +调查结果表明,开发者对人工智能技术在开源项目中的影响较为乐观,特别是在提高效率、自动化测试、数据分析以及项目安全方面有较大应用前景。 + +| 人工智能在开源生态面临的挑战 | +|:----------------------------------------------------------------:| +| | + +此外,隐私和数据安全问题、透明度、伦理问题被视为人工智能技术在开源生态中面临的主要挑战,表明在 AI 技术应用中需要平衡技术难题和社会考量。 + +:::info 专家点评 +**余杰**:面对AI浪潮的席卷,我们应该保持冷静和自信,以积极的心态去拥抱它、学习它,充分利用AI技术,推动个人和项目的持续发展。 +::: diff --git a/yarn.lock b/yarn.lock new file mode 100644 index 0000000..75e150c --- /dev/null +++ b/yarn.lock @@ -0,0 +1,747 @@ +# THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY. +# yarn lockfile v1 + + +"@algolia/autocomplete-core@1.9.3": + version "1.9.3" + resolved "https://registry.yarnpkg.com/@algolia/autocomplete-core/-/autocomplete-core-1.9.3.tgz#1d56482a768c33aae0868c8533049e02e8961be7" + integrity sha512-009HdfugtGCdC4JdXUbVJClA0q0zh24yyePn+KUGk3rP7j8FEe/m5Yo/z65gn6nP/cM39PxpzqKrL7A6fP6PPw== + dependencies: + "@algolia/autocomplete-plugin-algolia-insights" "1.9.3" + "@algolia/autocomplete-shared" "1.9.3" + +"@algolia/autocomplete-plugin-algolia-insights@1.9.3": + version "1.9.3" + resolved "https://registry.yarnpkg.com/@algolia/autocomplete-plugin-algolia-insights/-/autocomplete-plugin-algolia-insights-1.9.3.tgz#9b7f8641052c8ead6d66c1623d444cbe19dde587" + integrity sha512-a/yTUkcO/Vyy+JffmAnTWbr4/90cLzw+CC3bRbhnULr/EM0fGNvM13oQQ14f2moLMcVDyAx/leczLlAOovhSZg== + dependencies: + "@algolia/autocomplete-shared" "1.9.3" + +"@algolia/autocomplete-preset-algolia@1.9.3": + version "1.9.3" + resolved "https://registry.yarnpkg.com/@algolia/autocomplete-preset-algolia/-/autocomplete-preset-algolia-1.9.3.tgz#64cca4a4304cfcad2cf730e83067e0c1b2f485da" + integrity sha512-d4qlt6YmrLMYy95n5TB52wtNDr6EgAIPH81dvvvW8UmuWRgxEtY0NJiPwl/h95JtG2vmRM804M0DSwMCNZlzRA== + dependencies: + "@algolia/autocomplete-shared" "1.9.3" + +"@algolia/autocomplete-shared@1.9.3": + version "1.9.3" + resolved "https://registry.yarnpkg.com/@algolia/autocomplete-shared/-/autocomplete-shared-1.9.3.tgz#2e22e830d36f0a9cf2c0ccd3c7f6d59435b77dfa" + integrity sha512-Wnm9E4Ye6Rl6sTTqjoymD+l8DjSTHsHboVRYrKgEt8Q7UHm9nYbqhN/i0fhUYA3OAEH7WA8x3jfpnmJm3rKvaQ== + +"@algolia/cache-browser-local-storage@4.23.1": + version "4.23.1" + resolved "https://registry.yarnpkg.com/@algolia/cache-browser-local-storage/-/cache-browser-local-storage-4.23.1.tgz#1e33f6e48a16ddd2568903eaf3866e541c28cc64" + integrity sha512-1sAJYmXN9TOk6Hd8BUQOglxP9Kq9F0qlISsuWxH6y4UjevevgxhLvA6VrODJb1ghwwQi0nODXxwUhNh0sGF8xw== + dependencies: + "@algolia/cache-common" "4.23.1" + +"@algolia/cache-common@4.23.1": + version "4.23.1" + resolved "https://registry.yarnpkg.com/@algolia/cache-common/-/cache-common-4.23.1.tgz#5f066ae1a0aeb4df94ec1fdb7c0b39b1099e9953" + integrity sha512-w0sqXuwbGyIDsFDHTZzTv79rZjW7vc/6vCPdqYAAkiUlvvCdUo0cCWFXpbMpvYHBS2IXZXJaQY0R9yL/bmk9VQ== + +"@algolia/cache-in-memory@4.23.1": + version "4.23.1" + resolved "https://registry.yarnpkg.com/@algolia/cache-in-memory/-/cache-in-memory-4.23.1.tgz#cc402d0f44b2948b97af05d107b0185f453bb0ff" + integrity sha512-Wy5J4c2vLi1Vfsc3qoM/trVtvN9BlV+X2hfiAhfTVclyney6fs/Rjus8lbadl1x5GjlPIgMNGxvr/A/wnJQUBw== + dependencies: + "@algolia/cache-common" "4.23.1" + +"@algolia/client-account@4.23.1": + version "4.23.1" + resolved "https://registry.yarnpkg.com/@algolia/client-account/-/client-account-4.23.1.tgz#179c8905a6cb35c2eb727478a9d7495c75c3c6fa" + integrity sha512-E8rALAfC7G1gruxW4zO3WgBkWQDJq1Crnxi45uo/KUYf78x+T7YwojyS42fHz//thbtPyPUC5WZCQlnzqgMZlg== + dependencies: + "@algolia/client-common" "4.23.1" + "@algolia/client-search" "4.23.1" + "@algolia/transporter" "4.23.1" + +"@algolia/client-analytics@4.23.1": + version "4.23.1" + resolved "https://registry.yarnpkg.com/@algolia/client-analytics/-/client-analytics-4.23.1.tgz#a6523e71f91dcc63fbe033342555b931b2480609" + integrity sha512-xtfp/M3TjHStStH/UQoxmt8SeVpxSgdZGcCY61+chG9fmbJHgeYtzECQu7SVZ+LPTW0nmyqMrpKQ9kFcgPnV1A== + dependencies: + "@algolia/client-common" "4.23.1" + "@algolia/client-search" "4.23.1" + "@algolia/requester-common" "4.23.1" + "@algolia/transporter" "4.23.1" + +"@algolia/client-common@4.23.1": + version "4.23.1" + resolved "https://registry.yarnpkg.com/@algolia/client-common/-/client-common-4.23.1.tgz#5ca47ecb35f2928fb1c94e7fcac54eb0f432425d" + integrity sha512-01lBsO8r4KeXWIDzVQoPMYwOndeAvSQk3xk3Bxwrt2ag5jrGswiq8DgEqPVx+PQw+7T5GY6dS25cYcdv1dVorA== + dependencies: + "@algolia/requester-common" "4.23.1" + "@algolia/transporter" "4.23.1" + +"@algolia/client-personalization@4.23.1": + version "4.23.1" + resolved "https://registry.yarnpkg.com/@algolia/client-personalization/-/client-personalization-4.23.1.tgz#60c88fcbca456890ce5725958b6c0de25cf62003" + integrity sha512-B8UEALAg1/6DaLuJOxYTfGBVrLZN4M7FoaYrjbHLw2oF5Y6bxe59Y5ug+lSbs6v9bWx7U9rNVpd8m2I8pobFcA== + dependencies: + "@algolia/client-common" "4.23.1" + "@algolia/requester-common" "4.23.1" + "@algolia/transporter" "4.23.1" + +"@algolia/client-search@4.23.1": + version "4.23.1" + resolved "https://registry.yarnpkg.com/@algolia/client-search/-/client-search-4.23.1.tgz#6d0faa8eb2dc06a4c484a4db144fb1319eda0bd0" + integrity sha512-jeA1ZksO0N33SZhcLRa4paUI7LFJrrhtMlw27eIdPTVv/npV0dMLoNGPg3MuLSeZqRKqfpY7tTOBjRZFMhskLg== + dependencies: + "@algolia/client-common" "4.23.1" + "@algolia/requester-common" "4.23.1" + "@algolia/transporter" "4.23.1" + +"@algolia/logger-common@4.23.1": + version "4.23.1" + resolved "https://registry.yarnpkg.com/@algolia/logger-common/-/logger-common-4.23.1.tgz#2e836adfbd3224b5f74d4dea1e25490965a00b06" + integrity sha512-hGsqJrpeZfw1Ng8ctWj9gg8zXlSmEMA0cfbBn3yoZa3so8oQZmB9uz57AJcJj1CfSBf+5SK8/AF4kjTungvgUA== + +"@algolia/logger-console@4.23.1": + version "4.23.1" + resolved "https://registry.yarnpkg.com/@algolia/logger-console/-/logger-console-4.23.1.tgz#6e3c02669c48f92bcb886d6e55215c0bd41946e6" + integrity sha512-6QYjtxsDwrdFeLoCcZmi9af/EbWkpUYSclx2d342EoayaY8S2tCORgqwzDmPPOpvi5Y6zPnDsj2BG7vrpK8bdg== + dependencies: + "@algolia/logger-common" "4.23.1" + +"@algolia/recommend@4.23.1": + version "4.23.1" + resolved "https://registry.yarnpkg.com/@algolia/recommend/-/recommend-4.23.1.tgz#6b106bf0c015cce7ba2160d34b3777f30f13230f" + integrity sha512-9Td+htxUYkUxrkaPOum9Q1jAy+NogxpwZ+Vvn3X+IBIfXECrNhIff+u1ddIirRM2rMphWrrO/3GWLZaKY7FOxw== + dependencies: + "@algolia/cache-browser-local-storage" "4.23.1" + "@algolia/cache-common" "4.23.1" + "@algolia/cache-in-memory" "4.23.1" + "@algolia/client-common" "4.23.1" + "@algolia/client-search" "4.23.1" + "@algolia/logger-common" "4.23.1" + "@algolia/logger-console" "4.23.1" + "@algolia/requester-browser-xhr" "4.23.1" + "@algolia/requester-common" "4.23.1" + "@algolia/requester-node-http" "4.23.1" + "@algolia/transporter" "4.23.1" + +"@algolia/requester-browser-xhr@4.23.1": + version "4.23.1" + resolved "https://registry.yarnpkg.com/@algolia/requester-browser-xhr/-/requester-browser-xhr-4.23.1.tgz#edd03228b756dedfdb570db446296ca68b8e4c21" + integrity sha512-dM8acMp6sn1HRoQrUiBYQCZvTrFwLwFuHagZH88nKhL52bUrtZXH2qUQ8RMQBb9am71J9COLyMgZYdyR+u8Ktg== + dependencies: + "@algolia/requester-common" "4.23.1" + +"@algolia/requester-common@4.23.1": + version "4.23.1" + resolved "https://registry.yarnpkg.com/@algolia/requester-common/-/requester-common-4.23.1.tgz#7180596c86f69e6a258eb5f4d955f9f3a5806191" + integrity sha512-G9+ySLxPBtn2o6Mk4NoxPnkYtAe/isxrVy5LmJ4za+aYEdV5tvZpgvn+k4558T7XoRBrI2eQKyjnvQs7zJeCdw== + +"@algolia/requester-node-http@4.23.1": + version "4.23.1" + resolved "https://registry.yarnpkg.com/@algolia/requester-node-http/-/requester-node-http-4.23.1.tgz#de66e1f70c1513dd65d954522a1526ad30187139" + integrity sha512-prpVKKc/WRAtZqZx3A6Ds+D6bl3jgnY4Nw8BhCO9yzwMu5oXaOFsZrzFRBeVUJbtOWMc/DMP38vY6DpdV6NSfw== + dependencies: + "@algolia/requester-common" "4.23.1" + +"@algolia/transporter@4.23.1": + version "4.23.1" + resolved "https://registry.yarnpkg.com/@algolia/transporter/-/transporter-4.23.1.tgz#e3990f8840211b45bab4fa4ffd79e6b6b776e36f" + integrity sha512-8ucVx0hV7yIeTZUFsix31UEIJFRauPriWlzLBbDy9gRHrK45WbMQ1S9FliDdoY5OvbFxi0/5OKRj0Dw1EkbcJA== + dependencies: + "@algolia/cache-common" "4.23.1" + "@algolia/logger-common" "4.23.1" + "@algolia/requester-common" "4.23.1" + +"@babel/parser@^7.23.9": + version "7.24.1" + resolved "https://registry.yarnpkg.com/@babel/parser/-/parser-7.24.1.tgz#1e416d3627393fab1cb5b0f2f1796a100ae9133a" + integrity sha512-Zo9c7N3xdOIQrNip7Lc9wvRPzlRtovHVE4lkz8WEDr7uYh/GMQhSiIgFxGIArRHYdJE5kxtZjAf8rT0xhdLCzg== + +"@docsearch/css@3.6.0", "@docsearch/css@^3.5.2": + version "3.6.0" + resolved "https://registry.yarnpkg.com/@docsearch/css/-/css-3.6.0.tgz#0e9f56f704b3a34d044d15fd9962ebc1536ba4fb" + integrity sha512-+sbxb71sWre+PwDK7X2T8+bhS6clcVMLwBPznX45Qu6opJcgRjAp7gYSDzVFp187J+feSj5dNBN1mJoi6ckkUQ== + +"@docsearch/js@^3.5.2": + version "3.6.0" + resolved "https://registry.yarnpkg.com/@docsearch/js/-/js-3.6.0.tgz#f9e46943449b9092d874944f7a80bcc071004cfb" + integrity sha512-QujhqINEElrkIfKwyyyTfbsfMAYCkylInLYMRqHy7PHc8xTBQCow73tlo/Kc7oIwBrCLf0P3YhjlOeV4v8hevQ== + dependencies: + "@docsearch/react" "3.6.0" + preact "^10.0.0" + +"@docsearch/react@3.6.0": + version "3.6.0" + resolved "https://registry.yarnpkg.com/@docsearch/react/-/react-3.6.0.tgz#b4f25228ecb7fc473741aefac592121e86dd2958" + integrity sha512-HUFut4ztcVNmqy9gp/wxNbC7pTOHhgVVkHVGCACTuLhUKUhKAF9KYHJtMiLUJxEqiFLQiuri1fWF8zqwM/cu1w== + dependencies: + "@algolia/autocomplete-core" "1.9.3" + "@algolia/autocomplete-preset-algolia" "1.9.3" + "@docsearch/css" "3.6.0" + algoliasearch "^4.19.1" + +"@esbuild/aix-ppc64@0.20.2": + version "0.20.2" + resolved "https://registry.yarnpkg.com/@esbuild/aix-ppc64/-/aix-ppc64-0.20.2.tgz#a70f4ac11c6a1dfc18b8bbb13284155d933b9537" + integrity sha512-D+EBOJHXdNZcLJRBkhENNG8Wji2kgc9AZ9KiPr1JuZjsNtyHzrsfLRrY0tk2H2aoFu6RANO1y1iPPUCDYWkb5g== + +"@esbuild/android-arm64@0.20.2": + version "0.20.2" + resolved "https://registry.yarnpkg.com/@esbuild/android-arm64/-/android-arm64-0.20.2.tgz#db1c9202a5bc92ea04c7b6840f1bbe09ebf9e6b9" + integrity sha512-mRzjLacRtl/tWU0SvD8lUEwb61yP9cqQo6noDZP/O8VkwafSYwZ4yWy24kan8jE/IMERpYncRt2dw438LP3Xmg== + +"@esbuild/android-arm@0.20.2": + version "0.20.2" + resolved "https://registry.yarnpkg.com/@esbuild/android-arm/-/android-arm-0.20.2.tgz#3b488c49aee9d491c2c8f98a909b785870d6e995" + integrity sha512-t98Ra6pw2VaDhqNWO2Oph2LXbz/EJcnLmKLGBJwEwXX/JAN83Fym1rU8l0JUWK6HkIbWONCSSatf4sf2NBRx/w== + +"@esbuild/android-x64@0.20.2": + version "0.20.2" + resolved "https://registry.yarnpkg.com/@esbuild/android-x64/-/android-x64-0.20.2.tgz#3b1628029e5576249d2b2d766696e50768449f98" + integrity sha512-btzExgV+/lMGDDa194CcUQm53ncxzeBrWJcncOBxuC6ndBkKxnHdFJn86mCIgTELsooUmwUm9FkhSp5HYu00Rg== + +"@esbuild/darwin-arm64@0.20.2": + version "0.20.2" + resolved "https://registry.yarnpkg.com/@esbuild/darwin-arm64/-/darwin-arm64-0.20.2.tgz#6e8517a045ddd86ae30c6608c8475ebc0c4000bb" + integrity sha512-4J6IRT+10J3aJH3l1yzEg9y3wkTDgDk7TSDFX+wKFiWjqWp/iCfLIYzGyasx9l0SAFPT1HwSCR+0w/h1ES/MjA== + +"@esbuild/darwin-x64@0.20.2": + version "0.20.2" + resolved "https://registry.yarnpkg.com/@esbuild/darwin-x64/-/darwin-x64-0.20.2.tgz#90ed098e1f9dd8a9381695b207e1cff45540a0d0" + integrity sha512-tBcXp9KNphnNH0dfhv8KYkZhjc+H3XBkF5DKtswJblV7KlT9EI2+jeA8DgBjp908WEuYll6pF+UStUCfEpdysA== + +"@esbuild/freebsd-arm64@0.20.2": + version "0.20.2" + resolved "https://registry.yarnpkg.com/@esbuild/freebsd-arm64/-/freebsd-arm64-0.20.2.tgz#d71502d1ee89a1130327e890364666c760a2a911" + integrity sha512-d3qI41G4SuLiCGCFGUrKsSeTXyWG6yem1KcGZVS+3FYlYhtNoNgYrWcvkOoaqMhwXSMrZRl69ArHsGJ9mYdbbw== + +"@esbuild/freebsd-x64@0.20.2": + version "0.20.2" + resolved "https://registry.yarnpkg.com/@esbuild/freebsd-x64/-/freebsd-x64-0.20.2.tgz#aa5ea58d9c1dd9af688b8b6f63ef0d3d60cea53c" + integrity sha512-d+DipyvHRuqEeM5zDivKV1KuXn9WeRX6vqSqIDgwIfPQtwMP4jaDsQsDncjTDDsExT4lR/91OLjRo8bmC1e+Cw== + +"@esbuild/linux-arm64@0.20.2": + version "0.20.2" + resolved "https://registry.yarnpkg.com/@esbuild/linux-arm64/-/linux-arm64-0.20.2.tgz#055b63725df678379b0f6db9d0fa85463755b2e5" + integrity sha512-9pb6rBjGvTFNira2FLIWqDk/uaf42sSyLE8j1rnUpuzsODBq7FvpwHYZxQ/It/8b+QOS1RYfqgGFNLRI+qlq2A== + +"@esbuild/linux-arm@0.20.2": + version "0.20.2" + resolved "https://registry.yarnpkg.com/@esbuild/linux-arm/-/linux-arm-0.20.2.tgz#76b3b98cb1f87936fbc37f073efabad49dcd889c" + integrity sha512-VhLPeR8HTMPccbuWWcEUD1Az68TqaTYyj6nfE4QByZIQEQVWBB8vup8PpR7y1QHL3CpcF6xd5WVBU/+SBEvGTg== + +"@esbuild/linux-ia32@0.20.2": + version "0.20.2" + resolved "https://registry.yarnpkg.com/@esbuild/linux-ia32/-/linux-ia32-0.20.2.tgz#c0e5e787c285264e5dfc7a79f04b8b4eefdad7fa" + integrity sha512-o10utieEkNPFDZFQm9CoP7Tvb33UutoJqg3qKf1PWVeeJhJw0Q347PxMvBgVVFgouYLGIhFYG0UGdBumROyiig== + +"@esbuild/linux-loong64@0.20.2": + version "0.20.2" + resolved "https://registry.yarnpkg.com/@esbuild/linux-loong64/-/linux-loong64-0.20.2.tgz#a6184e62bd7cdc63e0c0448b83801001653219c5" + integrity sha512-PR7sp6R/UC4CFVomVINKJ80pMFlfDfMQMYynX7t1tNTeivQ6XdX5r2XovMmha/VjR1YN/HgHWsVcTRIMkymrgQ== + +"@esbuild/linux-mips64el@0.20.2": + version "0.20.2" + resolved "https://registry.yarnpkg.com/@esbuild/linux-mips64el/-/linux-mips64el-0.20.2.tgz#d08e39ce86f45ef8fc88549d29c62b8acf5649aa" + integrity sha512-4BlTqeutE/KnOiTG5Y6Sb/Hw6hsBOZapOVF6njAESHInhlQAghVVZL1ZpIctBOoTFbQyGW+LsVYZ8lSSB3wkjA== + +"@esbuild/linux-ppc64@0.20.2": + version "0.20.2" + resolved "https://registry.yarnpkg.com/@esbuild/linux-ppc64/-/linux-ppc64-0.20.2.tgz#8d252f0b7756ffd6d1cbde5ea67ff8fd20437f20" + integrity sha512-rD3KsaDprDcfajSKdn25ooz5J5/fWBylaaXkuotBDGnMnDP1Uv5DLAN/45qfnf3JDYyJv/ytGHQaziHUdyzaAg== + +"@esbuild/linux-riscv64@0.20.2": + version "0.20.2" + resolved "https://registry.yarnpkg.com/@esbuild/linux-riscv64/-/linux-riscv64-0.20.2.tgz#19f6dcdb14409dae607f66ca1181dd4e9db81300" + integrity sha512-snwmBKacKmwTMmhLlz/3aH1Q9T8v45bKYGE3j26TsaOVtjIag4wLfWSiZykXzXuE1kbCE+zJRmwp+ZbIHinnVg== + +"@esbuild/linux-s390x@0.20.2": + version "0.20.2" + resolved "https://registry.yarnpkg.com/@esbuild/linux-s390x/-/linux-s390x-0.20.2.tgz#3c830c90f1a5d7dd1473d5595ea4ebb920988685" + integrity sha512-wcWISOobRWNm3cezm5HOZcYz1sKoHLd8VL1dl309DiixxVFoFe/o8HnwuIwn6sXre88Nwj+VwZUvJf4AFxkyrQ== + +"@esbuild/linux-x64@0.20.2": + version "0.20.2" + resolved "https://registry.yarnpkg.com/@esbuild/linux-x64/-/linux-x64-0.20.2.tgz#86eca35203afc0d9de0694c64ec0ab0a378f6fff" + integrity sha512-1MdwI6OOTsfQfek8sLwgyjOXAu+wKhLEoaOLTjbijk6E2WONYpH9ZU2mNtR+lZ2B4uwr+usqGuVfFT9tMtGvGw== + +"@esbuild/netbsd-x64@0.20.2": + version "0.20.2" + resolved "https://registry.yarnpkg.com/@esbuild/netbsd-x64/-/netbsd-x64-0.20.2.tgz#e771c8eb0e0f6e1877ffd4220036b98aed5915e6" + integrity sha512-K8/DhBxcVQkzYc43yJXDSyjlFeHQJBiowJ0uVL6Tor3jGQfSGHNNJcWxNbOI8v5k82prYqzPuwkzHt3J1T1iZQ== + +"@esbuild/openbsd-x64@0.20.2": + version "0.20.2" + resolved "https://registry.yarnpkg.com/@esbuild/openbsd-x64/-/openbsd-x64-0.20.2.tgz#9a795ae4b4e37e674f0f4d716f3e226dd7c39baf" + integrity sha512-eMpKlV0SThJmmJgiVyN9jTPJ2VBPquf6Kt/nAoo6DgHAoN57K15ZghiHaMvqjCye/uU4X5u3YSMgVBI1h3vKrQ== + +"@esbuild/sunos-x64@0.20.2": + version "0.20.2" + resolved "https://registry.yarnpkg.com/@esbuild/sunos-x64/-/sunos-x64-0.20.2.tgz#7df23b61a497b8ac189def6e25a95673caedb03f" + integrity sha512-2UyFtRC6cXLyejf/YEld4Hajo7UHILetzE1vsRcGL3earZEW77JxrFjH4Ez2qaTiEfMgAXxfAZCm1fvM/G/o8w== + +"@esbuild/win32-arm64@0.20.2": + version "0.20.2" + resolved "https://registry.yarnpkg.com/@esbuild/win32-arm64/-/win32-arm64-0.20.2.tgz#f1ae5abf9ca052ae11c1bc806fb4c0f519bacf90" + integrity sha512-GRibxoawM9ZCnDxnP3usoUDO9vUkpAxIIZ6GQI+IlVmr5kP3zUq+l17xELTHMWTWzjxa2guPNyrpq1GWmPvcGQ== + +"@esbuild/win32-ia32@0.20.2": + version "0.20.2" + resolved "https://registry.yarnpkg.com/@esbuild/win32-ia32/-/win32-ia32-0.20.2.tgz#241fe62c34d8e8461cd708277813e1d0ba55ce23" + integrity sha512-HfLOfn9YWmkSKRQqovpnITazdtquEW8/SoHW7pWpuEeguaZI4QnCRW6b+oZTztdBnZOS2hqJ6im/D5cPzBTTlQ== + +"@esbuild/win32-x64@0.20.2": + version "0.20.2" + resolved "https://registry.yarnpkg.com/@esbuild/win32-x64/-/win32-x64-0.20.2.tgz#9c907b21e30a52db959ba4f80bb01a0cc403d5cc" + integrity sha512-N49X4lJX27+l9jbLKSqZ6bKNjzQvHaT8IIFUy+YIqmXQdjYCToGWwOItDrfby14c78aDd5NHQl29xingXfCdLQ== + +"@jridgewell/sourcemap-codec@^1.4.15": + version "1.4.15" + resolved "https://registry.yarnpkg.com/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.4.15.tgz#d7c6e6755c78567a951e04ab52ef0fd26de59f32" + integrity sha512-eF2rxCRulEKXHTRiDrDy6erMYWqNw4LPdQ8UQA4huuxaQsVeRPFl2oM8oDGxMFhJUWZf9McpLtJasDDZb/Bpeg== + +"@rollup/rollup-android-arm-eabi@4.13.1": + version "4.13.1" + resolved "https://registry.yarnpkg.com/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.13.1.tgz#88ba199f996e0000689130ed69e47df8b0dfbc70" + integrity sha512-4C4UERETjXpC4WpBXDbkgNVgHyWfG3B/NKY46e7w5H134UDOFqUJKpsLm0UYmuupW+aJmRgeScrDNfvZ5WV80A== + +"@rollup/rollup-android-arm64@4.13.1": + version "4.13.1" + resolved "https://registry.yarnpkg.com/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.13.1.tgz#c89a55670e1179ed7ba3db06cee0d7da7b3d35ce" + integrity sha512-TrTaFJ9pXgfXEiJKQ3yQRelpQFqgRzVR9it8DbeRzG0RX7mKUy0bqhCFsgevwXLJepQKTnLl95TnPGf9T9AMOA== + +"@rollup/rollup-darwin-arm64@4.13.1": + version "4.13.1" + resolved "https://registry.yarnpkg.com/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.13.1.tgz#52e3496fa66d761833df23a9b4860e517efc7d1d" + integrity sha512-fz7jN6ahTI3cKzDO2otQuybts5cyu0feymg0bjvYCBrZQ8tSgE8pc0sSNEuGvifrQJWiwx9F05BowihmLxeQKw== + +"@rollup/rollup-darwin-x64@4.13.1": + version "4.13.1" + resolved "https://registry.yarnpkg.com/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.13.1.tgz#7678922773a8b53d8b4b3c3cc3e77b65fc71b489" + integrity sha512-WTvdz7SLMlJpektdrnWRUN9C0N2qNHwNbWpNo0a3Tod3gb9leX+yrYdCeB7VV36OtoyiPAivl7/xZ3G1z5h20g== + +"@rollup/rollup-linux-arm-gnueabihf@4.13.1": + version "4.13.1" + resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.13.1.tgz#69c3b896e3ee1c3487492323a02c2a3ae0d4b2e7" + integrity sha512-dBHQl+7wZzBYcIF6o4k2XkAfwP2ks1mYW2q/Gzv9n39uDcDiAGDqEyml08OdY0BIct0yLSPkDTqn4i6czpBLLw== + +"@rollup/rollup-linux-arm64-gnu@4.13.1": + version "4.13.1" + resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.13.1.tgz#13353f0ab65f4add0241f97f7ccc640b3a2b5cf2" + integrity sha512-bur4JOxvYxfrAmocRJIW0SADs3QdEYK6TQ7dTNz6Z4/lySeu3Z1H/+tl0a4qDYv0bCdBpUYM0sYa/X+9ZqgfSQ== + +"@rollup/rollup-linux-arm64-musl@4.13.1": + version "4.13.1" + resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.13.1.tgz#bf64eaa29b2b1e6bc9195f04bb30b2a4ffdc25ae" + integrity sha512-ssp77SjcDIUSoUyj7DU7/5iwM4ZEluY+N8umtCT9nBRs3u045t0KkW02LTyHouHDomnMXaXSZcCSr2bdMK63kA== + +"@rollup/rollup-linux-riscv64-gnu@4.13.1": + version "4.13.1" + resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.13.1.tgz#ec05966a4ed1b3338c8842108353ac6d3443dc6a" + integrity sha512-Jv1DkIvwEPAb+v25/Unrnnq9BO3F5cbFPT821n3S5litkz+O5NuXuNhqtPx5KtcwOTtaqkTsO+IVzJOsxd11aQ== + +"@rollup/rollup-linux-s390x-gnu@4.13.1": + version "4.13.1" + resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.13.1.tgz#c10a1f1522f0c9191ee45f677bd08763ddfdc039" + integrity sha512-U564BrhEfaNChdATQaEODtquCC7Ez+8Hxz1h5MAdMYj0AqD0GA9rHCpElajb/sQcaFL6NXmHc5O+7FXpWMa73Q== + +"@rollup/rollup-linux-x64-gnu@4.13.1": + version "4.13.1" + resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.13.1.tgz#836f948b6efc53f05f57d1d9ba92e90d629b3f22" + integrity sha512-zGRDulLTeDemR8DFYyFIQ8kMP02xpUsX4IBikc7lwL9PrwR3gWmX2NopqiGlI2ZVWMl15qZeUjumTwpv18N7sQ== + +"@rollup/rollup-linux-x64-musl@4.13.1": + version "4.13.1" + resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.13.1.tgz#07e0351cc18eeef026f903189d8312833cb6bd1f" + integrity sha512-VTk/MveyPdMFkYJJPCkYBw07KcTkGU2hLEyqYMsU4NjiOfzoaDTW9PWGRsNwiOA3qI0k/JQPjkl/4FCK1smskQ== + +"@rollup/rollup-win32-arm64-msvc@4.13.1": + version "4.13.1" + resolved "https://registry.yarnpkg.com/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.13.1.tgz#6f9359bbec6cb4a2c002642c63e3704b0b5e68b7" + integrity sha512-L+hX8Dtibb02r/OYCsp4sQQIi3ldZkFI0EUkMTDwRfFykXBPptoz/tuuGqEd3bThBSLRWPR6wsixDSgOx/U3Zw== + +"@rollup/rollup-win32-ia32-msvc@4.13.1": + version "4.13.1" + resolved "https://registry.yarnpkg.com/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.13.1.tgz#d6545a29ac9dd8b39a9161b87924f13471eb992e" + integrity sha512-+dI2jVPfM5A8zme8riEoNC7UKk0Lzc7jCj/U89cQIrOjrZTCWZl/+IXUeRT2rEZ5j25lnSA9G9H1Ob9azaF/KQ== + +"@rollup/rollup-win32-x64-msvc@4.13.1": + version "4.13.1" + resolved "https://registry.yarnpkg.com/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.13.1.tgz#d1b221daca9afca1885b91a311c6f4a04b0deeb5" + integrity sha512-YY1Exxo2viZ/O2dMHuwQvimJ0SqvL+OAWQLLY6rvXavgQKjhQUzn7nc1Dd29gjB5Fqi00nrBWctJBOyfVMIVxw== + +"@types/estree@1.0.5": + version "1.0.5" + resolved "https://registry.yarnpkg.com/@types/estree/-/estree-1.0.5.tgz#a6ce3e556e00fd9895dd872dd172ad0d4bd687f4" + integrity sha512-/kYRxGDLWzHOB7q+wtSUQlFrtcdUccpfy+X+9iMBpHK8QLLhx2wIPYuS5DYtR9Wa/YlZAbIovy7qVdB1Aq6Lyw== + +"@types/linkify-it@*": + version "3.0.5" + resolved "https://registry.yarnpkg.com/@types/linkify-it/-/linkify-it-3.0.5.tgz#1e78a3ac2428e6d7e6c05c1665c242023a4601d8" + integrity sha512-yg6E+u0/+Zjva+buc3EIb+29XEg4wltq7cSmd4Uc2EE/1nUVmxyzpX6gUXD0V8jIrG0r7YeOGVIbYRkxeooCtw== + +"@types/markdown-it@^13.0.7": + version "13.0.7" + resolved "https://registry.yarnpkg.com/@types/markdown-it/-/markdown-it-13.0.7.tgz#4a495115f470075bd4434a0438ac477a49c2e152" + integrity sha512-U/CBi2YUUcTHBt5tjO2r5QV/x0Po6nsYwQU4Y04fBS6vfoImaiZ6f8bi3CjTCxBPQSO1LMyUqkByzi8AidyxfA== + dependencies: + "@types/linkify-it" "*" + "@types/mdurl" "*" + +"@types/mdurl@*": + version "1.0.5" + resolved "https://registry.yarnpkg.com/@types/mdurl/-/mdurl-1.0.5.tgz#3e0d2db570e9fb6ccb2dc8fde0be1d79ac810d39" + integrity sha512-6L6VymKTzYSrEf4Nev4Xa1LCHKrlTlYCBMTlQKFuddo1CvQcE52I0mwfOJayueUC7MJuXOeHTcIU683lzd0cUA== + +"@types/node@^20.11.20": + version "20.11.30" + resolved "https://registry.yarnpkg.com/@types/node/-/node-20.11.30.tgz#9c33467fc23167a347e73834f788f4b9f399d66f" + integrity sha512-dHM6ZxwlmuZaRmUPfv1p+KrdD1Dci04FbdEm/9wEMouFqxYoFl5aMkt0VMAUtYRQDyYvD41WJLukhq/ha3YuTw== + dependencies: + undici-types "~5.26.4" + +"@types/web-bluetooth@^0.0.20": + version "0.0.20" + resolved "https://registry.yarnpkg.com/@types/web-bluetooth/-/web-bluetooth-0.0.20.tgz#f066abfcd1cbe66267cdbbf0de010d8a41b41597" + integrity sha512-g9gZnnXVq7gM7v3tJCWV/qw7w+KeOlSHAhgF9RytFyifW6AF61hdT2ucrYhPq9hLs5JIryeupHV3qGk95dH9ow== + +"@vitejs/plugin-vue@^5.0.2": + version "5.0.4" + resolved "https://registry.yarnpkg.com/@vitejs/plugin-vue/-/plugin-vue-5.0.4.tgz#508d6a0f2440f86945835d903fcc0d95d1bb8a37" + integrity sha512-WS3hevEszI6CEVEx28F8RjTX97k3KsrcY6kvTg7+Whm5y3oYvcqzVeGCU3hxSAn4uY2CLCkeokkGKpoctccilQ== + +"@vue/compiler-core@3.4.21": + version "3.4.21" + resolved "https://registry.yarnpkg.com/@vue/compiler-core/-/compiler-core-3.4.21.tgz#868b7085378fc24e58c9aed14c8d62110a62be1a" + integrity sha512-MjXawxZf2SbZszLPYxaFCjxfibYrzr3eYbKxwpLR9EQN+oaziSu3qKVbwBERj1IFIB8OLUewxB5m/BFzi613og== + dependencies: + "@babel/parser" "^7.23.9" + "@vue/shared" "3.4.21" + entities "^4.5.0" + estree-walker "^2.0.2" + source-map-js "^1.0.2" + +"@vue/compiler-dom@3.4.21": + version "3.4.21" + resolved "https://registry.yarnpkg.com/@vue/compiler-dom/-/compiler-dom-3.4.21.tgz#0077c355e2008207283a5a87d510330d22546803" + integrity sha512-IZC6FKowtT1sl0CR5DpXSiEB5ayw75oT2bma1BEhV7RRR1+cfwLrxc2Z8Zq/RGFzJ8w5r9QtCOvTjQgdn0IKmA== + dependencies: + "@vue/compiler-core" "3.4.21" + "@vue/shared" "3.4.21" + +"@vue/compiler-sfc@3.4.21": + version "3.4.21" + resolved "https://registry.yarnpkg.com/@vue/compiler-sfc/-/compiler-sfc-3.4.21.tgz#4af920dc31ab99e1ff5d152b5fe0ad12181145b2" + integrity sha512-me7epoTxYlY+2CUM7hy9PCDdpMPfIwrOvAXud2Upk10g4YLv9UBW7kL798TvMeDhPthkZ0CONNrK2GoeI1ODiQ== + dependencies: + "@babel/parser" "^7.23.9" + "@vue/compiler-core" "3.4.21" + "@vue/compiler-dom" "3.4.21" + "@vue/compiler-ssr" "3.4.21" + "@vue/shared" "3.4.21" + estree-walker "^2.0.2" + magic-string "^0.30.7" + postcss "^8.4.35" + source-map-js "^1.0.2" + +"@vue/compiler-ssr@3.4.21": + version "3.4.21" + resolved "https://registry.yarnpkg.com/@vue/compiler-ssr/-/compiler-ssr-3.4.21.tgz#b84ae64fb9c265df21fc67f7624587673d324fef" + integrity sha512-M5+9nI2lPpAsgXOGQobnIueVqc9sisBFexh5yMIMRAPYLa7+5wEJs8iqOZc1WAa9WQbx9GR2twgznU8LTIiZ4Q== + dependencies: + "@vue/compiler-dom" "3.4.21" + "@vue/shared" "3.4.21" + +"@vue/devtools-api@^6.5.1": + version "6.6.1" + resolved "https://registry.yarnpkg.com/@vue/devtools-api/-/devtools-api-6.6.1.tgz#7c14346383751d9f6ad4bea0963245b30220ef83" + integrity sha512-LgPscpE3Vs0x96PzSSB4IGVSZXZBZHpfxs+ZA1d+VEPwHdOXowy/Y2CsvCAIFrf+ssVU1pD1jidj505EpUnfbA== + +"@vue/reactivity@3.4.21": + version "3.4.21" + resolved "https://registry.yarnpkg.com/@vue/reactivity/-/reactivity-3.4.21.tgz#affd3415115b8ebf4927c8d2a0d6a24bccfa9f02" + integrity sha512-UhenImdc0L0/4ahGCyEzc/pZNwVgcglGy9HVzJ1Bq2Mm9qXOpP8RyNTjookw/gOCUlXSEtuZ2fUg5nrHcoqJcw== + dependencies: + "@vue/shared" "3.4.21" + +"@vue/runtime-core@3.4.21": + version "3.4.21" + resolved "https://registry.yarnpkg.com/@vue/runtime-core/-/runtime-core-3.4.21.tgz#3749c3f024a64c4c27ecd75aea4ca35634db0062" + integrity sha512-pQthsuYzE1XcGZznTKn73G0s14eCJcjaLvp3/DKeYWoFacD9glJoqlNBxt3W2c5S40t6CCcpPf+jG01N3ULyrA== + dependencies: + "@vue/reactivity" "3.4.21" + "@vue/shared" "3.4.21" + +"@vue/runtime-dom@3.4.21": + version "3.4.21" + resolved "https://registry.yarnpkg.com/@vue/runtime-dom/-/runtime-dom-3.4.21.tgz#91f867ef64eff232cac45095ab28ebc93ac74588" + integrity sha512-gvf+C9cFpevsQxbkRBS1NpU8CqxKw0ebqMvLwcGQrNpx6gqRDodqKqA+A2VZZpQ9RpK2f9yfg8VbW/EpdFUOJw== + dependencies: + "@vue/runtime-core" "3.4.21" + "@vue/shared" "3.4.21" + csstype "^3.1.3" + +"@vue/server-renderer@3.4.21": + version "3.4.21" + resolved "https://registry.yarnpkg.com/@vue/server-renderer/-/server-renderer-3.4.21.tgz#150751579d26661ee3ed26a28604667fa4222a97" + integrity sha512-aV1gXyKSN6Rz+6kZ6kr5+Ll14YzmIbeuWe7ryJl5muJ4uwSwY/aStXTixx76TwkZFJLm1aAlA/HSWEJ4EyiMkg== + dependencies: + "@vue/compiler-ssr" "3.4.21" + "@vue/shared" "3.4.21" + +"@vue/shared@3.4.21": + version "3.4.21" + resolved "https://registry.yarnpkg.com/@vue/shared/-/shared-3.4.21.tgz#de526a9059d0a599f0b429af7037cd0c3ed7d5a1" + integrity sha512-PuJe7vDIi6VYSinuEbUIQgMIRZGgM8e4R+G+/dQTk0X1NEdvgvvgv7m+rfmDH1gZzyA1OjjoWskvHlfRNfQf3g== + +"@vueuse/core@10.9.0", "@vueuse/core@^10.7.1": + version "10.9.0" + resolved "https://registry.yarnpkg.com/@vueuse/core/-/core-10.9.0.tgz#7d779a95cf0189de176fee63cee4ba44b3c85d64" + integrity sha512-/1vjTol8SXnx6xewDEKfS0Ra//ncg4Hb0DaZiwKf7drgfMsKFExQ+FnnENcN6efPen+1kIzhLQoGSy0eDUVOMg== + dependencies: + "@types/web-bluetooth" "^0.0.20" + "@vueuse/metadata" "10.9.0" + "@vueuse/shared" "10.9.0" + vue-demi ">=0.14.7" + +"@vueuse/integrations@^10.7.1": + version "10.9.0" + resolved "https://registry.yarnpkg.com/@vueuse/integrations/-/integrations-10.9.0.tgz#2b1a9556215ad3c1f96d39cbfbef102cf6e0ec05" + integrity sha512-acK+A01AYdWSvL4BZmCoJAcyHJ6EqhmkQEXbQLwev1MY7NBnS+hcEMx/BzVoR9zKI+UqEPMD9u6PsyAuiTRT4Q== + dependencies: + "@vueuse/core" "10.9.0" + "@vueuse/shared" "10.9.0" + vue-demi ">=0.14.7" + +"@vueuse/metadata@10.9.0": + version "10.9.0" + resolved "https://registry.yarnpkg.com/@vueuse/metadata/-/metadata-10.9.0.tgz#769a1a9db65daac15cf98084cbf7819ed3758620" + integrity sha512-iddNbg3yZM0X7qFY2sAotomgdHK7YJ6sKUvQqbvwnf7TmaVPxS4EJydcNsVejNdS8iWCtDk+fYXr7E32nyTnGA== + +"@vueuse/shared@10.9.0": + version "10.9.0" + resolved "https://registry.yarnpkg.com/@vueuse/shared/-/shared-10.9.0.tgz#13af2a348de15d07b7be2fd0c7fc9853a69d8fe0" + integrity sha512-Uud2IWncmAfJvRaFYzv5OHDli+FbOzxiVEQdLCKQKLyhz94PIyFC3CHcH7EDMwIn8NPtD06+PNbC/PiO0LGLtw== + dependencies: + vue-demi ">=0.14.7" + +algoliasearch@^4.19.1: + version "4.23.1" + resolved "https://registry.yarnpkg.com/algoliasearch/-/algoliasearch-4.23.1.tgz#efdf5b5dddb0dd684b6751f0df09ce791f274b7f" + integrity sha512-LNK5n23zDXVf8kaLMZrVDEy4ecxIFUDEsQlx+He1l+TCmP8eiheLVMi5eyZlU6qmmq3UWCxZmp6hzCXS/hvXEw== + dependencies: + "@algolia/cache-browser-local-storage" "4.23.1" + "@algolia/cache-common" "4.23.1" + "@algolia/cache-in-memory" "4.23.1" + "@algolia/client-account" "4.23.1" + "@algolia/client-analytics" "4.23.1" + "@algolia/client-common" "4.23.1" + "@algolia/client-personalization" "4.23.1" + "@algolia/client-search" "4.23.1" + "@algolia/logger-common" "4.23.1" + "@algolia/logger-console" "4.23.1" + "@algolia/recommend" "4.23.1" + "@algolia/requester-browser-xhr" "4.23.1" + "@algolia/requester-common" "4.23.1" + "@algolia/requester-node-http" "4.23.1" + "@algolia/transporter" "4.23.1" + +csstype@^3.1.3: + version "3.1.3" + resolved "https://registry.yarnpkg.com/csstype/-/csstype-3.1.3.tgz#d80ff294d114fb0e6ac500fbf85b60137d7eff81" + integrity sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw== + +entities@^4.5.0: + version "4.5.0" + resolved "https://registry.yarnpkg.com/entities/-/entities-4.5.0.tgz#5d268ea5e7113ec74c4d033b79ea5a35a488fb48" + integrity sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw== + +esbuild@^0.20.1: + version "0.20.2" + resolved "https://registry.yarnpkg.com/esbuild/-/esbuild-0.20.2.tgz#9d6b2386561766ee6b5a55196c6d766d28c87ea1" + integrity sha512-WdOOppmUNU+IbZ0PaDiTst80zjnrOkyJNHoKupIcVyU8Lvla3Ugx94VzkQ32Ijqd7UhHJy75gNWDMUekcrSJ6g== + optionalDependencies: + "@esbuild/aix-ppc64" "0.20.2" + "@esbuild/android-arm" "0.20.2" + "@esbuild/android-arm64" "0.20.2" + "@esbuild/android-x64" "0.20.2" + "@esbuild/darwin-arm64" "0.20.2" + "@esbuild/darwin-x64" "0.20.2" + "@esbuild/freebsd-arm64" "0.20.2" + "@esbuild/freebsd-x64" "0.20.2" + "@esbuild/linux-arm" "0.20.2" + "@esbuild/linux-arm64" "0.20.2" + "@esbuild/linux-ia32" "0.20.2" + "@esbuild/linux-loong64" "0.20.2" + "@esbuild/linux-mips64el" "0.20.2" + "@esbuild/linux-ppc64" "0.20.2" + "@esbuild/linux-riscv64" "0.20.2" + "@esbuild/linux-s390x" "0.20.2" + "@esbuild/linux-x64" "0.20.2" + "@esbuild/netbsd-x64" "0.20.2" + "@esbuild/openbsd-x64" "0.20.2" + "@esbuild/sunos-x64" "0.20.2" + "@esbuild/win32-arm64" "0.20.2" + "@esbuild/win32-ia32" "0.20.2" + "@esbuild/win32-x64" "0.20.2" + +estree-walker@^2.0.2: + version "2.0.2" + resolved "https://registry.yarnpkg.com/estree-walker/-/estree-walker-2.0.2.tgz#52f010178c2a4c117a7757cfe942adb7d2da4cac" + integrity sha512-Rfkk/Mp/DL7JVje3u18FxFujQlTNR2q6QfMSMB7AvCBx91NGj/ba3kCfza0f6dVDbw7YlRf/nDrn7pQrCCyQ/w== + +focus-trap@^7.5.4: + version "7.5.4" + resolved "https://registry.yarnpkg.com/focus-trap/-/focus-trap-7.5.4.tgz#6c4e342fe1dae6add9c2aa332a6e7a0bbd495ba2" + integrity sha512-N7kHdlgsO/v+iD/dMoJKtsSqs5Dz/dXZVebRgJw23LDk+jMi/974zyiOYDziY2JPp8xivq9BmUGwIJMiuSBi7w== + dependencies: + tabbable "^6.2.0" + +fsevents@~2.3.2, fsevents@~2.3.3: + version "2.3.3" + resolved "https://registry.yarnpkg.com/fsevents/-/fsevents-2.3.3.tgz#cac6407785d03675a2a5e1a5305c697b347d90d6" + integrity sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw== + +magic-string@^0.30.7: + version "0.30.8" + resolved "https://registry.yarnpkg.com/magic-string/-/magic-string-0.30.8.tgz#14e8624246d2bedba70d5462aa99ac9681844613" + integrity sha512-ISQTe55T2ao7XtlAStud6qwYPZjE4GK1S/BeVPus4jrq6JuOnQ00YKQC581RWhR122W7msZV263KzVeLoqidyQ== + dependencies: + "@jridgewell/sourcemap-codec" "^1.4.15" + +mark.js@8.11.1: + version "8.11.1" + resolved "https://registry.yarnpkg.com/mark.js/-/mark.js-8.11.1.tgz#180f1f9ebef8b0e638e4166ad52db879beb2ffc5" + integrity sha512-1I+1qpDt4idfgLQG+BNWmrqku+7/2bi5nLf4YwF8y8zXvmfiTBY3PV3ZibfrjBueCByROpuBjLLFCajqkgYoLQ== + +minisearch@^6.3.0: + version "6.3.0" + resolved "https://registry.yarnpkg.com/minisearch/-/minisearch-6.3.0.tgz#985a2f1ca3c73c2d65af94f0616bfe57164b0b6b" + integrity sha512-ihFnidEeU8iXzcVHy74dhkxh/dn8Dc08ERl0xwoMMGqp4+LvRSCgicb+zGqWthVokQKvCSxITlh3P08OzdTYCQ== + +nanoid@^3.3.7: + version "3.3.7" + resolved "https://registry.yarnpkg.com/nanoid/-/nanoid-3.3.7.tgz#d0c301a691bc8d54efa0a2226ccf3fe2fd656bd8" + integrity sha512-eSRppjcPIatRIMC1U6UngP8XFcz8MQWGQdt1MTBQ7NaAmvXDfvNxbvWV3x2y6CdEUciCSsDHDQZbhYaB8QEo2g== + +picocolors@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/picocolors/-/picocolors-1.0.0.tgz#cb5bdc74ff3f51892236eaf79d68bc44564ab81c" + integrity sha512-1fygroTLlHu66zi26VoTDv8yRgm0Fccecssto+MhsZ0D/DGW2sm8E8AjW7NU5VVTRt5GxbeZ5qBuJr+HyLYkjQ== + +postcss@^8.4.35, postcss@^8.4.36: + version "8.4.38" + resolved "https://registry.yarnpkg.com/postcss/-/postcss-8.4.38.tgz#b387d533baf2054288e337066d81c6bee9db9e0e" + integrity sha512-Wglpdk03BSfXkHoQa3b/oulrotAkwrlLDRSOb9D0bN86FdRyE9lppSp33aHNPgBa0JKCoB+drFLZkQoRRYae5A== + dependencies: + nanoid "^3.3.7" + picocolors "^1.0.0" + source-map-js "^1.2.0" + +preact@^10.0.0: + version "10.20.1" + resolved "https://registry.yarnpkg.com/preact/-/preact-10.20.1.tgz#1bc598ab630d8612978f7533da45809a8298542b" + integrity sha512-JIFjgFg9B2qnOoGiYMVBtrcFxHqn+dNXbq76bVmcaHYJFYR4lW67AOcXgAYQQTDYXDOg/kTZrKPNCdRgJ2UJmw== + +rollup@^4.13.0: + version "4.13.1" + resolved "https://registry.yarnpkg.com/rollup/-/rollup-4.13.1.tgz#5bd6d84eafd60280487085b8bf9c91679571005a" + integrity sha512-hFi+fU132IvJ2ZuihN56dwgpltpmLZHZWsx27rMCTZ2sYwrqlgL5sECGy1eeV2lAihD8EzChBVVhsXci0wD4Tg== + dependencies: + "@types/estree" "1.0.5" + optionalDependencies: + "@rollup/rollup-android-arm-eabi" "4.13.1" + "@rollup/rollup-android-arm64" "4.13.1" + "@rollup/rollup-darwin-arm64" "4.13.1" + "@rollup/rollup-darwin-x64" "4.13.1" + "@rollup/rollup-linux-arm-gnueabihf" "4.13.1" + "@rollup/rollup-linux-arm64-gnu" "4.13.1" + "@rollup/rollup-linux-arm64-musl" "4.13.1" + "@rollup/rollup-linux-riscv64-gnu" "4.13.1" + "@rollup/rollup-linux-s390x-gnu" "4.13.1" + "@rollup/rollup-linux-x64-gnu" "4.13.1" + "@rollup/rollup-linux-x64-musl" "4.13.1" + "@rollup/rollup-win32-arm64-msvc" "4.13.1" + "@rollup/rollup-win32-ia32-msvc" "4.13.1" + "@rollup/rollup-win32-x64-msvc" "4.13.1" + fsevents "~2.3.2" + +shikiji-core@0.9.19, shikiji-core@^0.9.17: + version "0.9.19" + resolved "https://registry.yarnpkg.com/shikiji-core/-/shikiji-core-0.9.19.tgz#227975e998eb2a579cf83de30977762be3802507" + integrity sha512-AFJu/vcNT21t0e6YrfadZ+9q86gvPum6iywRyt1OtIPjPFe25RQnYJyxHQPMLKCCWA992TPxmEmbNcOZCAJclw== + +shikiji-transformers@^0.9.17: + version "0.9.19" + resolved "https://registry.yarnpkg.com/shikiji-transformers/-/shikiji-transformers-0.9.19.tgz#23e629804d5f20332712f44f3907c03ce39052af" + integrity sha512-lGLI7Z8frQrIBbhZ74/eiJtxMoCQRbpaHEB+gcfvdIy+ZFaAtXncJGnc52932/UET+Y4GyKtwwC/vjWUCp+c/Q== + dependencies: + shikiji "0.9.19" + +shikiji@0.9.19, shikiji@^0.9.17: + version "0.9.19" + resolved "https://registry.yarnpkg.com/shikiji/-/shikiji-0.9.19.tgz#351a32b291a04cf9a6b69933f8044fe135b70f6f" + integrity sha512-Kw2NHWktdcdypCj1GkKpXH4o6Vxz8B8TykPlPuLHOGSV8VkhoCLcFOH4k19K4LXAQYRQmxg+0X/eM+m2sLhAkg== + dependencies: + shikiji-core "0.9.19" + +source-map-js@^1.0.2, source-map-js@^1.2.0: + version "1.2.0" + resolved "https://registry.yarnpkg.com/source-map-js/-/source-map-js-1.2.0.tgz#16b809c162517b5b8c3e7dcd315a2a5c2612b2af" + integrity sha512-itJW8lvSA0TXEphiRoawsCksnlf8SyvmFzIhltqAHluXd88pkCd+cXJVHTDwdCr0IzwptSm035IHQktUu1QUMg== + +tabbable@^6.2.0: + version "6.2.0" + resolved "https://registry.yarnpkg.com/tabbable/-/tabbable-6.2.0.tgz#732fb62bc0175cfcec257330be187dcfba1f3b97" + integrity sha512-Cat63mxsVJlzYvN51JmVXIgNoUokrIaT2zLclCXjRd8boZ0004U4KCs/sToJ75C6sdlByWxpYnb5Boif1VSFew== + +undici-types@~5.26.4: + version "5.26.5" + resolved "https://registry.yarnpkg.com/undici-types/-/undici-types-5.26.5.tgz#bcd539893d00b56e964fd2657a4866b221a65617" + integrity sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA== + +vite@^5.0.11: + version "5.2.6" + resolved "https://registry.yarnpkg.com/vite/-/vite-5.2.6.tgz#fc2ce309e0b4871e938cb0aca3b96c422c01f222" + integrity sha512-FPtnxFlSIKYjZ2eosBQamz4CbyrTizbZ3hnGJlh/wMtCrlp1Hah6AzBLjGI5I2urTfNnpovpHdrL6YRuBOPnCA== + dependencies: + esbuild "^0.20.1" + postcss "^8.4.36" + rollup "^4.13.0" + optionalDependencies: + fsevents "~2.3.3" + +vitepress@1.0.0-rc.36: + version "1.0.0-rc.36" + resolved "https://registry.yarnpkg.com/vitepress/-/vitepress-1.0.0-rc.36.tgz#78f1c19538d65ac4e69c44bea77579f2cccf5887" + integrity sha512-2z4dpM9PplN/yvTifhavOIAazlCR6OJ5PvLoRbc+7LdcFeIlCsuDGENLX4HjMW18jQZF5/j7++PNqdBfeazxUA== + dependencies: + "@docsearch/css" "^3.5.2" + "@docsearch/js" "^3.5.2" + "@types/markdown-it" "^13.0.7" + "@vitejs/plugin-vue" "^5.0.2" + "@vue/devtools-api" "^6.5.1" + "@vueuse/core" "^10.7.1" + "@vueuse/integrations" "^10.7.1" + focus-trap "^7.5.4" + mark.js "8.11.1" + minisearch "^6.3.0" + shikiji "^0.9.17" + shikiji-core "^0.9.17" + shikiji-transformers "^0.9.17" + vite "^5.0.11" + vue "^3.4.5" + +vue-demi@>=0.14.7: + version "0.14.7" + resolved "https://registry.yarnpkg.com/vue-demi/-/vue-demi-0.14.7.tgz#8317536b3ef74c5b09f268f7782e70194567d8f2" + integrity sha512-EOG8KXDQNwkJILkx/gPcoL/7vH+hORoBaKgGe+6W7VFMvCYJfmF2dGbvgDroVnI8LU7/kTu8mbjRZGBU1z9NTA== + +vue@^3.4.5: + version "3.4.21" + resolved "https://registry.yarnpkg.com/vue/-/vue-3.4.21.tgz#69ec30e267d358ee3a0ce16612ba89e00aaeb731" + integrity sha512-5hjyV/jLEIKD/jYl4cavMcnzKwjMKohureP8ejn3hhEjwhWIhWeuzL2kJAjzl/WyVsgPY56Sy4Z40C3lVshxXA== + dependencies: + "@vue/compiler-dom" "3.4.21" + "@vue/compiler-sfc" "3.4.21" + "@vue/runtime-dom" "3.4.21" + "@vue/server-renderer" "3.4.21" + "@vue/shared" "3.4.21"