awesome-japanese-nlp-resources.json

{
    "version": "1.0.0",
    "contents": {
        "Python library": {
            "https://github.com/WorksApplications/sudachi.rs": {
                "sub_category": "Morphology analysis",
                "repository_name": "sudachi.rs",
                "user_name": "WorksApplications",
                "description": "SudachiPy 0.6* and above are developed as Sudachi.rs."
            },
            "https://github.com/mocobeta/janome": {
                "sub_category": "Morphology analysis",
                "repository_name": "janome",
                "user_name": "mocobeta",
                "description": "Japanese morphological analysis engine written in pure Python"
            },
            "https://github.com/SamuraiT/mecab-python3": {
                "sub_category": "Morphology analysis",
                "repository_name": "mecab-python3",
                "user_name": "SamuraiT",
                "description": "mecab-python. mecab-python. you can find original version here:http://taku910.github.io/mecab/"
            },
            "https://github.com/ikegami-yukino/mecab": {
                "sub_category": "Morphology analysis",
                "repository_name": "mecab",
                "user_name": "ikegami-yukino",
                "description": "This repository is for building Windows 64-bit MeCab binary and improving MeCab Python binding."
            },
            "https://github.com/polm/fugashi": {
                "sub_category": "Morphology analysis",
                "repository_name": "fugashi",
                "user_name": "polm",
                "description": "A Cython MeCab wrapper for fast, pythonic Japanese tokenization and morphological analysis."
            },
            "https://github.com/taishi-i/nagisa": {
                "sub_category": "Morphology analysis",
                "repository_name": "nagisa",
                "user_name": "taishi-i",
                "description": "A Japanese tokenizer based on recurrent neural networks"
            },
            "https://github.com/ku-nlp/pyknp": {
                "sub_category": "Morphology analysis",
                "repository_name": "pyknp",
                "user_name": "ku-nlp",
                "description": "A Python Module for JUMAN++/KNP"
            },
            "https://github.com/chezou/Mykytea-python": {
                "sub_category": "Morphology analysis",
                "repository_name": "Mykytea-python",
                "user_name": "chezou",
                "description": "Python wrapper for KyTea"
            },
            "https://github.com/himkt/konoha": {
                "sub_category": "Morphology analysis",
                "repository_name": "konoha",
                "user_name": "himkt",
                "description": "Konoha: Simple wrapper of Japanese Tokenizers"
            },
            "https://github.com/buruzaemon/natto-py": {
                "sub_category": "Morphology analysis",
                "repository_name": "natto-py",
                "user_name": "buruzaemon",
                "description": "natto-py combines the Python programming language with MeCab, the part-of-speech and morphological analyzer for the Japanese language."
            },
            "https://github.com/ikegami-yukino/rakutenma-python": {
                "sub_category": "Morphology analysis",
                "repository_name": "rakutenma-python",
                "user_name": "ikegami-yukino",
                "description": "Rakuten MA (Python version)"
            },
            "https://github.com/daac-tools/python-vaporetto": {
                "sub_category": "Morphology analysis",
                "repository_name": "python-vaporetto",
                "user_name": "daac-tools",
                "description": "Vaporetto is a fast and lightweight pointwise prediction based tokenizer. This is a Python wrapper for Vaporetto."
            },
            "https://github.com/mkartawijaya/dango": {
                "sub_category": "Morphology analysis",
                "repository_name": "dango",
                "user_name": "mkartawijaya",
                "description": "An easy to use tokenizer for Japanese text, aimed at language learners and non-linguists"
            },
            "https://github.com/ku-nlp/rhoknp": {
                "sub_category": "Morphology analysis",
                "repository_name": "rhoknp",
                "user_name": "ku-nlp",
                "description": "Yet another Python binding for Juman++/KNP"
            },
            "https://github.com/daac-tools/python-vibrato": {
                "sub_category": "Morphology analysis",
                "repository_name": "python-vibrato",
                "user_name": "daac-tools",
                "description": "Viterbi-based accelerated tokenizer (Python wrapper)"
            },
            "https://github.com/megagonlabs/ginza": {
                "sub_category": "Parsing",
                "repository_name": "ginza",
                "user_name": "megagonlabs",
                "description": "A Japanese NLP Library using spaCy as framework based on Universal Dependencies"
            },
            "https://github.com/ikegami-yukino/cabocha": {
                "sub_category": "Parsing",
                "repository_name": "cabocha",
                "user_name": "ikegami-yukino",
                "description": "Yet Another Japanese Dependency Structure Analyzer"
            },
            "https://github.com/KoichiYasuoka/UniDic2UD": {
                "sub_category": "Parsing",
                "repository_name": "UniDic2UD",
                "user_name": "KoichiYasuoka",
                "description": "Tokenizer POS-tagger Lemmatizer and Dependency-parser for modern and contemporary Japanese"
            },
            "https://github.com/PKSHATechnology-Research/camphr": {
                "sub_category": "Parsing",
                "repository_name": "camphr",
                "user_name": "PKSHATechnology-Research",
                "description": "NLP libary for creating pipeline components"
            },
            "https://github.com/KoichiYasuoka/SuPar-UniDic": {
                "sub_category": "Parsing",
                "repository_name": "SuPar-UniDic",
                "user_name": "KoichiYasuoka",
                "description": "Tokenizer POS-tagger Lemmatizer and Dependency-parser for modern and contemporary Japanese with BERT models"
            },
            "https://github.com/masashi-y/depccg": {
                "sub_category": "Parsing",
                "repository_name": "depccg",
                "user_name": "masashi-y",
                "description": "A* CCG Parser with a Supertag and Dependency Factored Model"
            },
            "https://github.com/ku-nlp/bertknp": {
                "sub_category": "Parsing",
                "repository_name": "bertknp",
                "user_name": "ku-nlp",
                "description": "A Japanese dependency parser based on BERT"
            },
            "https://github.com/KoichiYasuoka/esupar": {
                "sub_category": "Parsing",
                "repository_name": "esupar",
                "user_name": "KoichiYasuoka",
                "description": "Tokenizer POS-Tagger and Dependency-parser with BERT/RoBERTa/DeBERTa models for Japanese and other languages"
            },
            "https://github.com/passaglia/yomikata": {
                "sub_category": "Parsing",
                "repository_name": "yomikata",
                "user_name": "passaglia",
                "description": "Heteronym disambiguation library using a fine-tuned BERT model."
            },
            "https://github.com/miurahr/pykakasi": {
                "sub_category": "Converter",
                "repository_name": "pykakasi",
                "user_name": "miurahr",
                "description": "Lightweight converter from Japanese Kana-kanji sentences into Kana-Roman."
            },
            "https://github.com/polm/cutlet": {
                "sub_category": "Converter",
                "repository_name": "cutlet",
                "user_name": "polm",
                "description": "Japanese to romaji converter in Python"
            },
            "https://github.com/shihono/alphabet2kana": {
                "sub_category": "Converter",
                "repository_name": "alphabet2kana",
                "user_name": "shihono",
                "description": "Convert English alphabet to Katakana"
            },
            "https://github.com/Greatdane/Convert-Numbers-to-Japanese": {
                "sub_category": "Converter",
                "repository_name": "Convert-Numbers-to-Japanese",
                "user_name": "Greatdane",
                "description": "Converts Arabic numerals, or 'western' style numbers, to a Japanese context."
            },
            "https://github.com/ikegami-yukino/mozcpy": {
                "sub_category": "Converter",
                "repository_name": "mozcpy",
                "user_name": "ikegami-yukino",
                "description": "Mozc for Python: Kana-Kanji converter"
            },
            "https://github.com/tachi-hi/jamorasep": {
                "sub_category": "Converter",
                "repository_name": "jamorasep",
                "user_name": "tachi-hi",
                "description": "Japanese text parser to separate Hiragana/Katakana string into morae (syllables)."
            },
            "https://github.com/korguchi/text2phoneme": {
                "sub_category": "Converter",
                "repository_name": "text2phoneme",
                "user_name": "korguchi",
                "description": "日本語文を音素列へ変換するスクリプト"
            },
            "https://github.com/ikegami-yukino/neologdn": {
                "sub_category": "Preprocessor",
                "repository_name": "neologdn",
                "user_name": "ikegami-yukino",
                "description": "Japanese text normalizer for mecab-neologd"
            },
            "https://github.com/ikegami-yukino/jaconv": {
                "sub_category": "Preprocessor",
                "repository_name": "jaconv",
                "user_name": "ikegami-yukino",
                "description": "Pure-Python Japanese character interconverter for Hiragana, Katakana, Hankaku, and Zenkaku"
            },
            "https://github.com/studio-ousia/mojimoji": {
                "sub_category": "Preprocessor",
                "repository_name": "mojimoji",
                "user_name": "studio-ousia",
                "description": "A fast converter between Japanese hankaku and zenkaku characters"
            },
            "https://github.com/ku-nlp/text-cleaning": {
                "sub_category": "Preprocessor",
                "repository_name": "text-cleaning",
                "user_name": "ku-nlp",
                "description": "A powerful text cleaner for Japanese web texts"
            },
            "https://github.com/HojiChar/HojiChar": {
                "sub_category": "Preprocessor",
                "repository_name": "HojiChar",
                "user_name": "HojiChar",
                "description": "複数の前処理を構成して管理するテキスト前処理ツール"
            },
            "https://github.com/megagonlabs/bunkai": {
                "sub_category": "Sentence spliter",
                "repository_name": "bunkai",
                "user_name": "megagonlabs",
                "description": "Sentence boundary disambiguation tool for Japanese texts (日本語文境界判定器)"
            },
            "https://github.com/hppRC/japanese-sentence-breaker": {
                "sub_category": "Sentence spliter",
                "repository_name": "japanese-sentence-breaker",
                "user_name": "hppRC",
                "description": "Japanese Sentence Breaker"
            },
            "https://github.com/ikegami-yukino/sengiri": {
                "sub_category": "Sentence spliter",
                "repository_name": "sengiri",
                "user_name": "ikegami-yukino",
                "description": "Yet another sentence-level tokenizer for the Japanese text"
            },
            "https://github.com/google/budoux": {
                "sub_category": "Sentence spliter",
                "repository_name": "budoux",
                "user_name": "google",
                "description": "Standalone. Small. Language-neutral. BudouX is the successor to Budou, the machine learning powered line break organizer tool."
            },
            "https://github.com/wwwcojp/ja_sentence_segmenter": {
                "sub_category": "Sentence spliter",
                "repository_name": "ja_sentence_segmenter",
                "user_name": "wwwcojp",
                "description": "japanese sentence segmentation library for python"
            },
            "https://github.com/mkartawijaya/hasami": {
                "sub_category": "Sentence spliter",
                "repository_name": "hasami",
                "user_name": "mkartawijaya",
                "description": "A tool to perform sentence segmentation on Japanese text"
            },
            "https://github.com/alinear-corp/kuzukiri": {
                "sub_category": "Sentence spliter",
                "repository_name": "kuzukiri",
                "user_name": "alinear-corp",
                "description": "Japanese Text Segmenter for Python written in Rust"
            },
            "https://github.com/hkiyomaru/ja-senter-benchmark": {
                "sub_category": "Sentence spliter",
                "repository_name": "ja-senter-benchmark",
                "user_name": "hkiyomaru",
                "description": "Comparison of Japanese Sentence Segmentation Tools"
            },
            "https://github.com/ikegami-yukino/oseti": {
                "sub_category": "Sentiment analysis",
                "repository_name": "oseti",
                "user_name": "ikegami-yukino",
                "description": "Dictionary based Sentiment Analysis for Japanese"
            },
            "https://github.com/liaoziyang/negapoji": {
                "sub_category": "Sentiment analysis",
                "repository_name": "negapoji",
                "user_name": "liaoziyang",
                "description": "Japanese negative positive classification.日本語文書のネガポジを判定。"
            },
            "https://github.com/ikegami-yukino/pymlask": {
                "sub_category": "Sentiment analysis",
                "repository_name": "pymlask",
                "user_name": "ikegami-yukino",
                "description": "Emotion analyzer for Japanese text"
            },
            "https://github.com/Hironsan/asari": {
                "sub_category": "Sentiment analysis",
                "repository_name": "asari",
                "user_name": "Hironsan",
                "description": "Japanese sentiment analyzer implemented in Python."
            },
            "https://github.com/MorinoseiMorizo/jparacrawl-finetune": {
                "sub_category": "Machine translation",
                "repository_name": "jparacrawl-finetune",
                "user_name": "MorinoseiMorizo",
                "description": "An example usage of JParaCrawl pre-trained Neural Machine Translation (NMT) models."
            },
            "https://github.com/Mao-KU/JASS": {
                "sub_category": "Machine translation",
                "repository_name": "JASS",
                "user_name": "Mao-KU",
                "description": "JASS: Japanese-specific Sequence to Sequence Pre-training for Neural Machine Translation (LREC2020) & Linguistically Driven Multi-Task Pre-Training for Low-Resource Neural Machine Translation (ACM TALLIP)"
            },
            "https://github.com/cl-tohoku/PheMT": {
                "sub_category": "Machine translation",
                "repository_name": "PheMT",
                "user_name": "cl-tohoku",
                "description": "A phenomenon-wise evaluation dataset for Japanese-English machine translation robustness. The dataset is based on the MTNT dataset, with additional annotations of four linguistic phenomena; Proper Noun, Abbreviated Noun, Colloquial Expression, and Variant. COLING 2020."
            },
            "https://github.com/ku-nlp/VISA": {
                "sub_category": "Machine translation",
                "repository_name": "VISA",
                "user_name": "ku-nlp",
                "description": "An ambiguous subtitles dataset for visual scene-aware machine translation"
            },
            "https://github.com/chakki-works/namaco": {
                "sub_category": "Named entity recognition",
                "repository_name": "namaco",
                "user_name": "chakki-works",
                "description": "Character Based Named Entity Recognition."
            },
            "https://github.com/chakki-works/entitypedia": {
                "sub_category": "Named entity recognition",
                "repository_name": "entitypedia",
                "user_name": "chakki-works",
                "description": "Entitypedia is an Extended Named Entity Dictionary from Wikipedia."
            },
            "https://github.com/ken11/noyaki": {
                "sub_category": "Named entity recognition",
                "repository_name": "noyaki",
                "user_name": "ken11",
                "description": "Converts character span label information to tokenized text-based label information."
            },
            "https://github.com/ken11/bert-japanese-ner-finetuning": {
                "sub_category": "Named entity recognition",
                "repository_name": "bert-japanese-ner-finetuning",
                "user_name": "ken11",
                "description": "Code to perform finetuning of the BERT model. BERTモデルのファインチューニングで固有表現抽出用タスクのモデルを作成・使用するサンプルです"
            },
            "https://github.com/aih-uth/joint-information-extraction-hs": {
                "sub_category": "Named entity recognition",
                "repository_name": "joint-information-extraction-hs",
                "user_name": "aih-uth",
                "description": "詳細なアノテーション基準に基づく症例報告コーパスからの固有表現及び関係の抽出精度の推論を行うコード"
            },
            "https://github.com/kha-white/manga-ocr": {
                "sub_category": "OCR",
                "repository_name": "manga-ocr",
                "user_name": "kha-white",
                "description": "About Optical character recognition for Japanese text, with the main focus being Japanese manga"
            },
            "https://github.com/kha-white/mokuro": {
                "sub_category": "OCR",
                "repository_name": "mokuro",
                "user_name": "kha-white",
                "description": "Read Japanese manga inside browser with selectable text."
            },
            "https://github.com/yas-sim/handwritten-japanese-ocr": {
                "sub_category": "OCR",
                "repository_name": "handwritten-japanese-ocr",
                "user_name": "yas-sim",
                "description": "Handwritten Japanese OCR demo using touch panel to draw the input text using Intel OpenVINO toolkit"
            },
            "https://github.com/tanreinama/OCR_Japanease": {
                "sub_category": "OCR",
                "repository_name": "OCR_Japanease",
                "user_name": "tanreinama",
                "description": "日本語OCR"
            },
            "https://github.com/ndl-lab/ndlocr_cli": {
                "sub_category": "OCR",
                "repository_name": "ndlocr_cli",
                "user_name": "ndl-lab",
                "description": "NDLOCRのアプリケーション"
            },
            "https://github.com/clovaai/donut": {
                "sub_category": "OCR",
                "repository_name": "donut",
                "user_name": "clovaai",
                "description": "Official Implementation of OCR-free Document Understanding Transformer (Donut) and Synthetic Document Generator (SynthDoG), ECCV 2022"
            },
            "https://github.com/ttop32/JMTrans": {
                "sub_category": "OCR",
                "repository_name": "JMTrans",
                "user_name": "ttop32",
                "description": "get japanese manga from url to translate manga image"
            },
            "https://github.com/ducanh841988/Kindai-OCR": {
                "sub_category": "OCR",
                "repository_name": "Kindai-OCR",
                "user_name": "ducanh841988",
                "description": "OCR system for recognizing modern Japanese magazines"
            },
            "https://github.com/ndl-lab/text_recognition": {
                "sub_category": "OCR",
                "repository_name": "text_recognition",
                "user_name": "ndl-lab",
                "description": "NDLOCR用テキスト認識モジュール"
            },
            "https://github.com/yahoojapan/JGLUE": {
                "sub_category": "Tool for pretrained models",
                "repository_name": "JGLUE",
                "user_name": "yahoojapan",
                "description": "JGLUE: Japanese General Language Understanding Evaluation"
            },
            "https://github.com/megagonlabs/ginza-transformers": {
                "sub_category": "Tool for pretrained models",
                "repository_name": "ginza-transformers",
                "user_name": "megagonlabs",
                "description": "Use custom tokenizers in spacy-transformers"
            },
            "https://github.com/Jinyamyzk/t5_japanese_dialogue_generation": {
                "sub_category": "Tool for pretrained models",
                "repository_name": "t5_japanese_dialogue_generation",
                "user_name": "Jinyamyzk",
                "description": "T5による会話生成"
            },
            "https://github.com/Masao-Taketani/japanese_text_classification": {
                "sub_category": "Tool for pretrained models",
                "repository_name": "japanese_text_classification",
                "user_name": "Masao-Taketani",
                "description": "To investigate various DNN text classifiers including MLP, CNN, RNN, BERT approaches."
            },
            "https://github.com/izuna385/Japanese-BERT-Sentiment-Analyzer": {
                "sub_category": "Tool for pretrained models",
                "repository_name": "Japanese-BERT-Sentiment-Analyzer",
                "user_name": "izuna385",
                "description": "Deploying sentiment analysis server with FastAPI and BERT"
            },
            "https://github.com/minhpqn/jmlm_scoring": {
                "sub_category": "Tool for pretrained models",
                "repository_name": "jmlm_scoring",
                "user_name": "minhpqn",
                "description": "Masked Language Model-based Scoring for Japanese and Vietnamese"
            },
            "https://github.com/shunk031/allennlp-shiba-model": {
                "sub_category": "Tool for pretrained models",
                "repository_name": "allennlp-shiba-model",
                "user_name": "shunk031",
                "description": "AllenNLP integration for Shiba: Japanese CANINE model"
            },
            "https://github.com/shihono/evaluate_japanese_w2v": {
                "sub_category": "Tool for pretrained models",
                "repository_name": "evaluate_japanese_w2v",
                "user_name": "shihono",
                "description": "script to evaluate pre-trained Japanese word2vec model on Japanese similarity dataset"
            },
            "https://github.com/jonnyli1125/gector-ja": {
                "sub_category": "Tool for pretrained models",
                "repository_name": "gector-ja",
                "user_name": "jonnyli1125",
                "description": "BERT-based GEC tagging for Japanese"
            },
            "https://github.com/tanreinama/Japanese-BPEEncoder": {
                "sub_category": "Tool for pretrained models",
                "repository_name": "Japanese-BPEEncoder",
                "user_name": "tanreinama",
                "description": "Japanese-BPEEncoder"
            },
            "https://github.com/tanreinama/Japanese-BPEEncoder_V2": {
                "sub_category": "Tool for pretrained models",
                "repository_name": "Japanese-BPEEncoder_V2",
                "user_name": "tanreinama",
                "description": "Japanese-BPEEncoder Version 2"
            },
            "https://github.com/youichiro/transformer-copy": {
                "sub_category": "Tool for pretrained models",
                "repository_name": "transformer-copy",
                "user_name": "youichiro",
                "description": "日本語文法誤り訂正ツール"
            },
            "https://github.com/rinnakk/japanese-stable-diffusion": {
                "sub_category": "Tool for pretrained models",
                "repository_name": "japanese-stable-diffusion",
                "user_name": "rinnakk",
                "description": "Japanese Stable Diffusion is a Japanese specific latent text-to-image diffusion model capable of generating photo-realistic images given any text input."
            },
            "https://github.com/taishi-i/nagisa_bert": {
                "sub_category": "Tool for pretrained models",
                "repository_name": "nagisa_bert",
                "user_name": "taishi-i",
                "description": "A BERT model for nagisa"
            },
            "https://github.com/rinnakk/prefix-tuning-gpt": {
                "sub_category": "Tool for pretrained models",
                "repository_name": "prefix-tuning-gpt",
                "user_name": "rinnakk",
                "description": "Example code for prefix-tuning GPT/GPT-NeoX models and for inference with trained prefixes"
            },
            "https://github.com/nobu-g/JGLUE-benchmark": {
                "sub_category": "Tool for pretrained models",
                "repository_name": "JGLUE-benchmark",
                "user_name": "nobu-g",
                "description": "Training and evaluation scripts for JGLUE, a Japanese language understanding benchmark"
            },
            "https://github.com/rskmoi/namedivider-python": {
                "sub_category": null,
                "repository_name": "namedivider-python",
                "user_name": "rskmoi",
                "description": "A tool for dividing the Japanese full name into a family name and a given name."
            },
            "https://github.com/ikegami-yukino/asa-python": {
                "sub_category": null,
                "repository_name": "asa-python",
                "user_name": "ikegami-yukino",
                "description": "A curated list of resources dedicated to Python libraries of NLP for Japanese"
            },
            "https://github.com/Takeuchi-Lab-LM/python_asa": {
                "sub_category": null,
                "repository_name": "python_asa",
                "user_name": "Takeuchi-Lab-LM",
                "description": "python版日本語意味役割付与システム（ASA）"
            },
            "https://github.com/taishi-i/toiro": {
                "sub_category": null,
                "repository_name": "toiro",
                "user_name": "taishi-i",
                "description": "A comparison tool of Japanese tokenizers"
            },
            "https://github.com/yagays/ja-timex": {
                "sub_category": null,
                "repository_name": "ja-timex",
                "user_name": "yagays",
                "description": "自然言語で書かれた時間情報表現を抽出/規格化するルールベースの解析器"
            },
            "https://github.com/Kensuke-Mitsuzawa/JapaneseTokenizers": {
                "sub_category": null,
                "repository_name": "JapaneseTokenizers",
                "user_name": "Kensuke-Mitsuzawa",
                "description": "A set of metrics for feature selection from text data"
            },
            "https://github.com/kajyuuen/daaja": {
                "sub_category": null,
                "repository_name": "daaja",
                "user_name": "kajyuuen",
                "description": "This repository has implementations of data augmentation for NLP for Japanese."
            },
            "https://github.com/accel-brain/accel-brain-code": {
                "sub_category": null,
                "repository_name": "accel-brain-code",
                "user_name": "accel-brain",
                "description": "The purpose of this repository is to make prototypes as case study in the context of proof of concept(PoC) and research and development(R&D) that I have written in my website. The main research topics are Auto-Encoders in relation to the representation learning, the statistical machine learning for energy-based models, adversarial generation net…"
            },
            "https://github.com/ku-nlp/kyoto-reader": {
                "sub_category": null,
                "repository_name": "kyoto-reader",
                "user_name": "ku-nlp",
                "description": "A processor for KyotoCorpus, KWDLC, and AnnotatedFKCCorpus"
            },
            "https://github.com/takapy0210/nlplot": {
                "sub_category": null,
                "repository_name": "nlplot",
                "user_name": "takapy0210",
                "description": "Visualization Module for Natural Language Processing"
            },
            "https://github.com/kanjirz50/rake-ja": {
                "sub_category": null,
                "repository_name": "rake-ja",
                "user_name": "kanjirz50",
                "description": "Rapid Automatic Keyword Extraction algorithm for Japanese"
            },
            "https://github.com/izuna385/jel": {
                "sub_category": null,
                "repository_name": "jel",
                "user_name": "izuna385",
                "description": "Japanese Entity Linker."
            },
            "https://github.com/sociocom/MedNER-J": {
                "sub_category": null,
                "repository_name": "MedNER-J",
                "user_name": "sociocom",
                "description": "Latest version of MedEX/J (Japanese disease name extractor)"
            },
            "https://github.com/ikegami-yukino/zunda-python": {
                "sub_category": null,
                "repository_name": "zunda-python",
                "user_name": "ikegami-yukino",
                "description": "Zunda: Japanese Enhanced Modality Analyzer client for Python."
            },
            "https://github.com/cl-tohoku/AIO2_DPR_baseline": {
                "sub_category": null,
                "repository_name": "AIO2_DPR_baseline",
                "user_name": "cl-tohoku",
                "description": "https://www.nlp.ecei.tohoku.ac.jp/projects/aio/"
            },
            "https://github.com/cl-tohoku/showcase": {
                "sub_category": null,
                "repository_name": "showcase",
                "user_name": "cl-tohoku",
                "description": "A PyTorch implementation of the Japanese Predicate-Argument Structure (PAS) analyser presented in the paper of Matsubayashi & Inui (2018) with some improvements."
            },
            "https://github.com/rixwew/darts-clone-python": {
                "sub_category": null,
                "repository_name": "darts-clone-python",
                "user_name": "rixwew",
                "description": "Darts-clone python binding"
            },
            "https://github.com/megagonlabs/jrte-corpus_example": {
                "sub_category": null,
                "repository_name": "jrte-corpus_example",
                "user_name": "megagonlabs",
                "description": "Example codes for Japanese Realistic Textual Entailment Corpus"
            },
            "https://github.com/megagonlabs/desuwa": {
                "sub_category": null,
                "repository_name": "desuwa",
                "user_name": "megagonlabs",
                "description": "Feature annotator to morphemes and phrases based on KNP rule files (pure-Python)"
            },
            "https://github.com/Hironsan/HotPepperGourmetDialogue": {
                "sub_category": null,
                "repository_name": "HotPepperGourmetDialogue",
                "user_name": "Hironsan",
                "description": "Restaurant Search System through Dialogue in Japanese."
            },
            "https://github.com/upura/nlp-recipes-ja": {
                "sub_category": null,
                "repository_name": "nlp-recipes-ja",
                "user_name": "upura",
                "description": "Samples codes for natural language processing in Japanese"
            },
            "https://github.com/olsgaard/Japanese_nlp_scripts": {
                "sub_category": null,
                "repository_name": "Japanese_nlp_scripts",
                "user_name": "olsgaard",
                "description": "Small example scripts for working with Japanese texts in Python"
            },
            "https://github.com/sociocom/DNorm-J": {
                "sub_category": null,
                "repository_name": "DNorm-J",
                "user_name": "sociocom",
                "description": "Japanese version of DNorm"
            },
            "https://github.com/ku-nlp/pyknp-eventgraph": {
                "sub_category": null,
                "repository_name": "pyknp-eventgraph",
                "user_name": "ku-nlp",
                "description": "EventGraph is a development platform for high-level NLP applications in Japanese."
            },
            "https://github.com/ku-nlp/ishi": {
                "sub_category": null,
                "repository_name": "ishi",
                "user_name": "ku-nlp",
                "description": "Ishi: A volition classifier for Japanese"
            },
            "https://github.com/musyoku/python-npylm": {
                "sub_category": null,
                "repository_name": "python-npylm",
                "user_name": "musyoku",
                "description": "ベイズ階層言語モデルによる教師なし形態素解析"
            },
            "https://github.com/musyoku/python-npycrf": {
                "sub_category": null,
                "repository_name": "python-npycrf",
                "user_name": "musyoku",
                "description": "条件付確率場とベイズ階層言語モデルの統合による半教師あり形態素解析"
            },
            "https://github.com/musyoku/unsupervised-pos-tagging": {
                "sub_category": null,
                "repository_name": "unsupervised-pos-tagging",
                "user_name": "musyoku",
                "description": "教師なし品詞タグ推定"
            },
            "https://github.com/cocodrips/negima": {
                "sub_category": null,
                "repository_name": "negima",
                "user_name": "cocodrips",
                "description": "Negima is a Python package to extract phrases in Japanese text by using the part-of-speeches based rules you defined."
            },
            "https://github.com/neilctwu/YouyakuMan": {
                "sub_category": null,
                "repository_name": "YouyakuMan",
                "user_name": "neilctwu",
                "description": "Extractive summarizer using BertSum as summarization model"
            },
            "https://github.com/takumakanari/japanese-numbers-python": {
                "sub_category": null,
                "repository_name": "japanese-numbers-python",
                "user_name": "takumakanari",
                "description": "A parser for Japanese number (Kanji, arabic) in the natural language."
            },
            "https://github.com/itayperl/kantan": {
                "sub_category": null,
                "repository_name": "kantan",
                "user_name": "itayperl",
                "description": "Lookup japanese words by radical patterns"
            },
            "https://github.com/knok/make-meidai-dialogue": {
                "sub_category": null,
                "repository_name": "make-meidai-dialogue",
                "user_name": "knok",
                "description": "Get Japanese dialogue corpus"
            },
            "https://github.com/ryuryukke/japanese_summarizer": {
                "sub_category": null,
                "repository_name": "japanese_summarizer",
                "user_name": "ryuryukke",
                "description": "A summarizer for Japanese articles."
            },
            "https://github.com/letuananh/chirptext": {
                "sub_category": null,
                "repository_name": "chirptext",
                "user_name": "letuananh",
                "description": "ChirpText is a collection of text processing tools for Python."
            },
            "https://github.com/alvations/yubin": {
                "sub_category": null,
                "repository_name": "yubin",
                "user_name": "alvations",
                "description": "Japanese Address Munger"
            },
            "https://github.com/hppRC/jawiki-cleaner": {
                "sub_category": null,
                "repository_name": "jawiki-cleaner",
                "user_name": "hppRC",
                "description": "Japanese Wikipedia Cleaner"
            },
            "https://github.com/iory/japanese2phoneme": {
                "sub_category": null,
                "repository_name": "japanese2phoneme",
                "user_name": "iory",
                "description": "A python library to convert Japanese to phoneme."
            },
            "https://github.com/arusl/anlp_nlp2021_d3-1": {
                "sub_category": null,
                "repository_name": "anlp_nlp2021_d3-1",
                "user_name": "arusl",
                "description": "This repository contains codes related to the experiments in \"An Experimental Evaluation of Japanese Tokenizers for Sentiment-Based Text Classification\""
            },
            "https://github.com/shibuiwilliam/aozora_classification": {
                "sub_category": null,
                "repository_name": "aozora_classification",
                "user_name": "shibuiwilliam",
                "description": "About"
            },
            "https://github.com/borh/aozora-corpus-generator": {
                "sub_category": null,
                "repository_name": "aozora-corpus-generator",
                "user_name": "borh",
                "description": "Generates plain or tokenized text files from the Aozora Bunko"
            },
            "https://github.com/jiali-ms/JLM": {
                "sub_category": null,
                "repository_name": "JLM",
                "user_name": "jiali-ms",
                "description": "A fast LSTM Language Model for large vocabulary language like Japanese and Chinese"
            },
            "https://github.com/m3yrin/NTM": {
                "sub_category": null,
                "repository_name": "NTM",
                "user_name": "m3yrin",
                "description": "Testing of Neural Topic Modeling for Japanese articles"
            },
            "https://github.com/Machine-Learning-Tokyo/EN-JP-ML-Lexicon": {
                "sub_category": null,
                "repository_name": "EN-JP-ML-Lexicon",
                "user_name": "Machine-Learning-Tokyo",
                "description": "This is a English-Japanese lexicon for Machine Learning and Deep Learning terminology."
            },
            "https://github.com/discus0434/text-generation": {
                "sub_category": null,
                "repository_name": "text-generation",
                "user_name": "discus0434",
                "description": "Easy-to-use scripts to fine-tune GPT-2-JA with your own texts, to generate sentences, and to tweet them automatically."
            },
            "https://github.com/yuyay/chainer_nic": {
                "sub_category": null,
                "repository_name": "chainer_nic",
                "user_name": "yuyay",
                "description": "Neural Image Caption (NIC) on chainer, its pretrained models on English and Japanese image caption datasets."
            },
            "https://github.com/JetRunner/unihan-lm": {
                "sub_category": null,
                "repository_name": "unihan-lm",
                "user_name": "JetRunner",
                "description": "The official repository for \"UnihanLM: Coarse-to-Fine Chinese-Japanese Language Model Pretraining with the Unihan Database\", AACL-IJCNLP 2020"
            },
            "https://github.com/ken11/mbart-finetuning": {
                "sub_category": null,
                "repository_name": "mbart-finetuning",
                "user_name": "ken11",
                "description": "Code to perform finetuning of the mBART model."
            },
            "https://github.com/sarulab-speech/xvector_jtubespeech": {
                "sub_category": null,
                "repository_name": "xvector_jtubespeech",
                "user_name": "sarulab-speech",
                "description": "xvector model on jtubespeech"
            },
            "https://github.com/shogo82148/TinySegmenterMaker": {
                "sub_category": null,
                "repository_name": "TinySegmenterMaker",
                "user_name": "shogo82148",
                "description": "TinySegmenter用の学習モデルを自作するためのツール．"
            },
            "https://github.com/shogo82148/Grongish": {
                "sub_category": null,
                "repository_name": "Grongish",
                "user_name": "shogo82148",
                "description": "日本語とグロンギ語の相互変換スクリプト"
            },
            "https://github.com/aocattleya/WordCloud-Japanese": {
                "sub_category": null,
                "repository_name": "WordCloud-Japanese",
                "user_name": "aocattleya",
                "description": "WordCloudでの日本語文章をMecab（形態素解析エンジン）を使用せずに形態素解析チックな表示を実現するスクリプト"
            },
            "https://github.com/hiraokusky/snark": {
                "sub_category": null,
                "repository_name": "snark",
                "user_name": "hiraokusky",
                "description": "日本語ワードネットを利用したDBアクセスライブラリ"
            },
            "https://github.com/mkan0141/toEmoji": {
                "sub_category": null,
                "repository_name": "toEmoji",
                "user_name": "mkan0141",
                "description": "日本語文を絵文字だけの文に変換するなにか"
            },
            "https://github.com/kanjirz50/termextract": {
                "sub_category": null,
                "repository_name": "termextract",
                "user_name": "kanjirz50",
                "description": "専門用語抽出アルゴリズムの実装の練習"
            },
            "https://github.com/TUT-SLP-lab/JDT-with-KenLM-scoring": {
                "sub_category": null,
                "repository_name": "JDT-with-KenLM-scoring",
                "user_name": "TUT-SLP-lab",
                "description": "Japanese-Dialog-Transformerの応答候補に対して、KenLMによるN-gram言語モデルでスコアリングし、フィルタリング若しくはリランキングを行う。"
            },
            "https://github.com/KentoW/mixture-of-unigram-model": {
                "sub_category": null,
                "repository_name": "mixture-of-unigram-model",
                "user_name": "KentoW",
                "description": "Mixture of Unigram Model and Infinite Mixture of Unigram Model in Python. (混合ユニグラムモデルと無限混合ユニグラムモデル)"
            },
            "https://github.com/KentoW/hidden-markov-model": {
                "sub_category": null,
                "repository_name": "hidden-markov-model",
                "user_name": "KentoW",
                "description": "Hidden Markov Model (HMM) and Infinite Hidden Markov Model (iHMM) in Python. (隠れマルコフモデルと無限隠れマルコフモデル)"
            },
            "https://github.com/KentoW/Ngram-language-model": {
                "sub_category": null,
                "repository_name": "Ngram-language-model",
                "user_name": "KentoW",
                "description": "Ngram language model in Python. (Nグラム言語モデル)"
            },
            "https://github.com/JeanMaximilienCadic/ASRDeepSpeech": {
                "sub_category": null,
                "repository_name": "ASRDeepSpeech",
                "user_name": "JeanMaximilienCadic",
                "description": "Automatic Speech Recognition with deepspeech2 model in pytorch with support from Zakuro AI."
            },
            "https://github.com/yohokuno/neural_ime": {
                "sub_category": null,
                "repository_name": "neural_ime",
                "user_name": "yohokuno",
                "description": "Neural IME: Neural Input Method Engine"
            },
            "https://github.com/Kyubyong/neural_japanese_transliterator": {
                "sub_category": null,
                "repository_name": "neural_japanese_transliterator",
                "user_name": "Kyubyong",
                "description": "Can neural networks transliterate Romaji into Japanese correctly?"
            },
            "https://github.com/SamuraiT/tinysegmenter": {
                "sub_category": null,
                "repository_name": "tinysegmenter",
                "user_name": "SamuraiT",
                "description": "tokenizer specified for Japanese"
            },
            "https://github.com/chck/AugLy-jp": {
                "sub_category": null,
                "repository_name": "AugLy-jp",
                "user_name": "chck",
                "description": "Data Augmentation for Japanese Text on AugLy"
            },
            "https://github.com/Mumumu4/furigana4epub": {
                "sub_category": null,
                "repository_name": "furigana4epub",
                "user_name": "Mumumu4",
                "description": "A Python script for adding furigana to Japanese epub books using Mecab and Unidic."
            },
            "https://github.com/SmashinFries/PyKatsuyou": {
                "sub_category": null,
                "repository_name": "PyKatsuyou",
                "user_name": "SmashinFries",
                "description": "Japanese verb/adjective inflections tool"
            },
            "https://github.com/t-sagara/jageocoder": {
                "sub_category": null,
                "repository_name": "jageocoder",
                "user_name": "t-sagara",
                "description": "Pure Python Japanese address geocoder"
            },
            "https://github.com/geonlp-platform/pygeonlp": {
                "sub_category": null,
                "repository_name": "pygeonlp",
                "user_name": "geonlp-platform",
                "description": "pygeonlp, A python module for geotagging Japanese texts."
            },
            "https://github.com/yoriyuki/nksnd": {
                "sub_category": null,
                "repository_name": "nksnd",
                "user_name": "yoriyuki",
                "description": "New kana-kanji conversion engine"
            },
            "https://github.com/racerandom/JaMIE": {
                "sub_category": null,
                "repository_name": "JaMIE",
                "user_name": "racerandom",
                "description": "A Japanese Medical Information Extraction Toolkit"
            },
            "https://github.com/GINK03/fasttext-vs-word2vec-on-twitter-data": {
                "sub_category": null,
                "repository_name": "fasttext-vs-word2vec-on-twitter-data",
                "user_name": "GINK03",
                "description": "fasttextとword2vecの比較と、実行スクリプト、学習スクリプトです"
            },
            "https://github.com/GINK03/minimal-search-engine": {
                "sub_category": null,
                "repository_name": "minimal-search-engine",
                "user_name": "GINK03",
                "description": "最小のサーチエンジン/PageRank/tf-idf"
            },
            "https://github.com/GINK03/5ch-analysis": {
                "sub_category": null,
                "repository_name": "5ch-analysis",
                "user_name": "GINK03",
                "description": "5chの過去ログをスクレイピングして、過去流行った単語(ex, 香具師, orz)などを追跡調査"
            },
            "https://github.com/tatHi/tweet_extructor": {
                "sub_category": null,
                "repository_name": "tweet_extructor",
                "user_name": "tatHi",
                "description": "Twitter日本語評判分析データセットのためのツイートダウンローダ"
            },
            "https://github.com/hkiyomaru/japanese-word-aggregation": {
                "sub_category": null,
                "repository_name": "japanese-word-aggregation",
                "user_name": "hkiyomaru",
                "description": "Aggregating Japanese words based on Juman++ and ConceptNet5.5"
            },
            "https://github.com/hkiyomaru/jinf": {
                "sub_category": null,
                "repository_name": "jinf",
                "user_name": "hkiyomaru",
                "description": "A Japanese inflection converter"
            },
            "https://github.com/ku-nlp/kwja": {
                "sub_category": null,
                "repository_name": "kwja",
                "user_name": "ku-nlp",
                "description": "A unified language analyzer for Japanese"
            },
            "https://github.com/Ryutaro-A/mlm-scoring-transformers": {
                "sub_category": null,
                "repository_name": "mlm-scoring-transformers",
                "user_name": "Ryutaro-A",
                "description": "Reproduced package based on Masked Language Model Scoring (ACL2020)."
            },
            "https://github.com/Japanese-Image-Captioning/ClipCap-for-Japanese": {
                "sub_category": null,
                "repository_name": "ClipCap-for-Japanese",
                "user_name": "Japanese-Image-Captioning",
                "description": "[PyTorch] ClipCap for Japanese"
            },
            "https://github.com/Japanese-Image-Captioning/SAT-for-Japanese": {
                "sub_category": null,
                "repository_name": "SAT-for-Japanese",
                "user_name": "Japanese-Image-Captioning",
                "description": "[PyTorch] Show, Attend and Tell for Japanese"
            },
            "https://github.com/cihai/cihai": {
                "sub_category": null,
                "repository_name": "cihai",
                "user_name": "cihai",
                "description": "Python library for CJK (Chinese, Japanese, and Korean) language dictionary"
            },
            "https://github.com/6gsn/marine": {
                "sub_category": null,
                "repository_name": "marine",
                "user_name": "6gsn",
                "description": "MARINE : Multi-task leaRnIng-based JapaNese accent Estimation"
            },
            "https://github.com/sarulab-speech/whisper-asr-finetune": {
                "sub_category": null,
                "repository_name": "whisper-asr-finetune",
                "user_name": "sarulab-speech",
                "description": "Finetuning Whisper ASR model"
            },
            "https://github.com/CjangCjengh/japanese_chatbot": {
                "sub_category": null,
                "repository_name": "japanese_chatbot",
                "user_name": "CjangCjengh",
                "description": "A PyTorch Implementation of japanese chatbot using BERT and Transformer's decoder"
            },
            "https://github.com/yamamaya/radicalchar": {
                "sub_category": null,
                "repository_name": "radicalchar",
                "user_name": "yamamaya",
                "description": "部首文字正規化ライブラリ"
            },
            "https://github.com/tokuhirom/akaza": {
                "sub_category": null,
                "repository_name": "akaza",
                "user_name": "tokuhirom",
                "description": "Yet another Japanese IME for IBus/Linux"
            },
            "https://github.com/polm/posuto": {
                "sub_category": null,
                "repository_name": "posuto",
                "user_name": "polm",
                "description": "Japanese postal code data."
            },
            "https://github.com/CjangCjengh/tacotron2-japanese": {
                "sub_category": null,
                "repository_name": "tacotron2-japanese",
                "user_name": "CjangCjengh",
                "description": "Tacotron2 implementation of Japanese"
            },
            "https://github.com/esrille/ibus-hiragana": {
                "sub_category": null,
                "repository_name": "ibus-hiragana",
                "user_name": "esrille",
                "description": "ひらがなIME for IBus"
            },
            "https://github.com/esrille/furiganapad": {
                "sub_category": null,
                "repository_name": "furiganapad",
                "user_name": "esrille",
                "description": "ふりがなパッド"
            },
            "https://github.com/WorksApplications/chikkarpy": {
                "sub_category": null,
                "repository_name": "chikkarpy",
                "user_name": "WorksApplications",
                "description": "Japanese synonym library"
            },
            "https://github.com/p-geon/ja-tokenizer-docker-py": {
                "sub_category": null,
                "repository_name": "ja-tokenizer-docker-py",
                "user_name": "p-geon",
                "description": "Mecab + NEologd + Docker + Python3"
            },
            "https://github.com/oshizo/JapaneseEmbeddingEval": {
                "sub_category": null,
                "repository_name": "JapaneseEmbeddingEval",
                "user_name": "oshizo",
                "description": "JapaneseEmbeddingEval"
            },
            "https://github.com/karakuri-ai/gptuber-by-langchain": {
                "sub_category": null,
                "repository_name": "gptuber-by-langchain",
                "user_name": "karakuri-ai",
                "description": "GPTがYouTuberをやります"
            },
            "https://github.com/google/shuwa": {
                "sub_category": null,
                "repository_name": "shuwa",
                "user_name": "google",
                "description": "Extend GNOME On-Screen Keyboard for Input Methods"
            },
            "https://github.com/CyberAgentAILab/japanese-nli-model": {
                "sub_category": null,
                "repository_name": "japanese-nli-model",
                "user_name": "CyberAgentAILab",
                "description": "This repository provides the code for Japanese NLI model, a fine-tuned masked language model."
            },
            "https://github.com/tos-kamiya/tra-fugu": {
                "sub_category": null,
                "repository_name": "tra-fugu",
                "user_name": "tos-kamiya",
                "description": "A tool for Japanese-English translation and English-Japanese translation by using FuguMT"
            },
            "https://github.com/s-taka/fugumt": {
                "sub_category": null,
                "repository_name": "fugumt",
                "user_name": "s-taka",
                "description": "ぷるーふおぶこんせぷと で公開した機械翻訳エンジンを利用する翻訳環境です。 フォームに入力された文字列の翻訳、PDFの翻訳が可能です。"
            },
            "https://github.com/keio-smilab23/JaSPICE": {
                "sub_category": null,
                "repository_name": "JaSPICE",
                "user_name": "keio-smilab23",
                "description": "JaSPICE: Automatic Evaluation Metric Using Predicate-Argument Structures for Image Captioning Models"
            },
            "https://github.com/yantaisa11/Retrieval-based-Voice-Conversion-WebUI-JP-localization": {
                "sub_category": null,
                "repository_name": "Retrieval-based-Voice-Conversion-WebUI-JP-localization",
                "user_name": "yantaisa11",
                "description": "jp-localization"
            },
            "https://github.com/r9y9/pyopenjtalk": {
                "sub_category": null,
                "repository_name": "pyopenjtalk",
                "user_name": "r9y9",
                "description": "Python wrapper for OpenJTalk"
            }
        },
        "C++": {
            "https://github.com/taku910/mecab": {
                "sub_category": "Morphology analysis",
                "repository_name": "mecab",
                "user_name": "taku910",
                "description": "Yet another Japanese morphological analyzer"
            },
            "https://github.com/ku-nlp/jumanpp": {
                "sub_category": "Morphology analysis",
                "repository_name": "jumanpp",
                "user_name": "ku-nlp",
                "description": "Juman++ (a Morphological Analyzer Toolkit)"
            },
            "https://github.com/neubig/kytea": {
                "sub_category": "Morphology analysis",
                "repository_name": "kytea",
                "user_name": "neubig",
                "description": "The Kyoto Text Analysis Toolkit for word segmentation and pronunciation estimation, etc."
            },
            "https://github.com/taku910/cabocha": {
                "sub_category": "Parsing",
                "repository_name": "cabocha",
                "user_name": "taku910",
                "description": "Yet Another Japanese Dependency Structure Analyzer"
            },
            "https://github.com/ku-nlp/knp": {
                "sub_category": "Parsing",
                "repository_name": "knp",
                "user_name": "ku-nlp",
                "description": "A Japanese Parser"
            },
            "https://github.com/yohokuno/jsc": {
                "sub_category": null,
                "repository_name": "jsc",
                "user_name": "yohokuno",
                "description": "Joint source channel model for Japanese Kana Kanji conversion, Chinese pinyin input and CJE mixed input."
            },
            "https://github.com/codefirst/aquaskk": {
                "sub_category": null,
                "repository_name": "aquaskk",
                "user_name": "codefirst",
                "description": "An input method without morphological analysis."
            },
            "https://github.com/google/mozc": {
                "sub_category": null,
                "repository_name": "mozc",
                "user_name": "google",
                "description": "a Japanese Input Method Editor designed for multi-platform"
            },
            "https://github.com/tuem/trimatch": {
                "sub_category": null,
                "repository_name": "trimatch",
                "user_name": "tuem",
                "description": "Trimatch: An (Exact|Prefix|Approximate) String Matching Library"
            },
            "https://github.com/tuem/resembla": {
                "sub_category": null,
                "repository_name": "resembla",
                "user_name": "tuem",
                "description": "Resembla: Word-based Japanese similar sentence search library"
            }
        },
        "Rust crate": {
            "https://github.com/lindera-morphology/lindera": {
                "sub_category": "Morphology analysis",
                "repository_name": "lindera",
                "user_name": "lindera-morphology",
                "description": "A morphological analysis library."
            },
            "https://github.com/daac-tools/vaporetto": {
                "sub_category": "Morphology analysis",
                "repository_name": "vaporetto",
                "user_name": "daac-tools",
                "description": "Vaporetto: Very Accelerated POintwise pREdicTion based TOkenizer"
            },
            "https://github.com/Leko/goya": {
                "sub_category": "Morphology analysis",
                "repository_name": "goya",
                "user_name": "Leko",
                "description": "Japanese Morphological Analysis written in Rust"
            },
            "https://github.com/daac-tools/vibrato": {
                "sub_category": "Morphology analysis",
                "repository_name": "vibrato",
                "user_name": "daac-tools",
                "description": "vibrato: Viterbi-based accelerated tokenizer"
            },
            "https://github.com/agatan/yoin": {
                "sub_category": "Morphology analysis",
                "repository_name": "yoin",
                "user_name": "agatan",
                "description": "A Japanese Morphological Analyzer written in pure Rust"
            },
            "https://github.com/tsurai/mecab-rs": {
                "sub_category": "Morphology analysis",
                "repository_name": "mecab-rs",
                "user_name": "tsurai",
                "description": "Safe Rust bindings for mecab a part-of-speech and morphological analyzer library"
            },
            "https://github.com/nakagami/awabi": {
                "sub_category": "Morphology analysis",
                "repository_name": "awabi",
                "user_name": "nakagami",
                "description": "A morphological analyzer using mecab dictionary"
            },
            "https://github.com/PSeitz/wana_kana_rust": {
                "sub_category": "Converter",
                "repository_name": "wana_kana_rust",
                "user_name": "PSeitz",
                "description": "and Romaji"
            },
            "https://github.com/gemmarx/unicode-jp-rs": {
                "sub_category": "Converter",
                "repository_name": "unicode-jp-rs",
                "user_name": "gemmarx",
                "description": "A Rust library to convert Japanese Half-width-kana[半角ｶﾅ] and Wide-alphanumeric[全角英数] into normal ones"
            },
            "https://github.com/gbrlsnchs/kana": {
                "sub_category": "Converter",
                "repository_name": "kana",
                "user_name": "gbrlsnchs",
                "description": "[Mirror] CLI program for transliterating romaji text to either hiragana or katakana"
            },
            "https://github.com/lindera-morphology/lindera-tantivy": {
                "sub_category": "Search engine library",
                "repository_name": "lindera-tantivy",
                "user_name": "lindera-morphology",
                "description": "Lindera tokenizer for Tantivy."
            },
            "https://github.com/akr4/tantivy-vibrato": {
                "sub_category": "Search engine library",
                "repository_name": "tantivy-vibrato",
                "user_name": "akr4",
                "description": "A Tantivy tokenizer using Vibrato."
            },
            "https://github.com/daac-tools/daachorse": {
                "sub_category": null,
                "repository_name": "daachorse",
                "user_name": "daac-tools",
                "description": "A fast implementation of the Aho-Corasick algorithm using the compact double-array data structure in Rust."
            },
            "https://github.com/legalforce-research/find-simdoc": {
                "sub_category": null,
                "repository_name": "find-simdoc",
                "user_name": "legalforce-research",
                "description": "Finding all pairs of similar documents time- and memory-efficiently"
            },
            "https://github.com/daac-tools/crawdad": {
                "sub_category": null,
                "repository_name": "crawdad",
                "user_name": "daac-tools",
                "description": "Rust library of natural language dictionaries using character-wise double-array tries."
            },
            "https://github.com/legalforce-research/tokenizer-speed-bench": {
                "sub_category": null,
                "repository_name": "tokenizer-speed-bench",
                "user_name": "legalforce-research",
                "description": "Comparison code of various tokenizers"
            },
            "https://github.com/legalforce-research/stringmatch-bench": {
                "sub_category": null,
                "repository_name": "stringmatch-bench",
                "user_name": "legalforce-research",
                "description": "Here provides benchmark tools to compare the performance of data structures for string matching."
            },
            "https://github.com/algon-320/vime": {
                "sub_category": null,
                "repository_name": "vime",
                "user_name": "algon-320",
                "description": "Using Vim as an input method for X11 apps"
            },
            "https://github.com/VOICEVOX/voicevox_core": {
                "sub_category": null,
                "repository_name": "voicevox_core",
                "user_name": "VOICEVOX",
                "description": "無料で使える中品質なテキスト読み上げソフトウェア、VOICEVOXのコア"
            },
            "https://github.com/akaza-im/akaza": {
                "sub_category": null,
                "repository_name": "akaza",
                "user_name": "akaza-im",
                "description": "Yet another Japanese IME for IBus/Linux"
            },
            "https://github.com/WeDontPanic/Jotoba": {
                "sub_category": null,
                "repository_name": "Jotoba",
                "user_name": "WeDontPanic",
                "description": "A free online, self-hostable, multilang Japanese dictionary."
            },
            "https://github.com/shinespark/dvorakjp-romantable": {
                "sub_category": null,
                "repository_name": "dvorakjp-romantable",
                "user_name": "shinespark",
                "description": "Google 日本語入力用DvorakJPローマ字テーブル / DvorakJP Roman Table for Google Japanese Input"
            },
            "https://github.com/Netdex/niinii": {
                "sub_category": null,
                "repository_name": "niinii",
                "user_name": "Netdex",
                "description": "Japanese glossator for assisted reading of text using Ichiran"
            }
        },
        "JavaScript": {
            "https://github.com/takuyaa/kuromoji.js": {
                "sub_category": "Morphology analysis",
                "repository_name": "kuromoji.js",
                "user_name": "takuyaa",
                "description": "JavaScript implementation of Japanese morphological analyzer"
            },
            "https://github.com/rakuten-nlp/rakutenma": {
                "sub_category": "Morphology analysis",
                "repository_name": "rakutenma",
                "user_name": "rakuten-nlp",
                "description": "morphological analyzer (word segmentor + PoS Tagger) for Chinese and Japanese written purely in JavaScript."
            },
            "https://github.com/golbin/node-mecab-ya": {
                "sub_category": "Morphology analysis",
                "repository_name": "node-mecab-ya",
                "user_name": "golbin",
                "description": "Yet another mecab wrapper for nodejs"
            },
            "https://github.com/thammin/juman-bin": {
                "sub_category": "Morphology analysis",
                "repository_name": "juman-bin",
                "user_name": "thammin",
                "description": "a User-Extensible Morphological Analyzer for Japanese. 日本語形態素解析システム"
            },
            "https://github.com/hecomi/node-mecab-async": {
                "sub_category": "Morphology analysis",
                "repository_name": "node-mecab-async",
                "user_name": "hecomi",
                "description": "Asynchronous japanese morphological analyser using MeCab."
            },
            "https://github.com/hexenq/kuroshiro": {
                "sub_category": "Converter",
                "repository_name": "kuroshiro",
                "user_name": "hexenq",
                "description": "Japanese language library for converting Japanese sentence to Hiragana, Katakana or Romaji with furigana and okurigana modes supported."
            },
            "https://github.com/hexenq/kuroshiro-analyzer-kuromoji": {
                "sub_category": "Converter",
                "repository_name": "kuroshiro-analyzer-kuromoji",
                "user_name": "hexenq",
                "description": "Kuromoji morphological analyzer for kuroshiro."
            },
            "https://github.com/lovell/hepburn": {
                "sub_category": "Converter",
                "repository_name": "hepburn",
                "user_name": "lovell",
                "description": "Node.js module for converting Japanese Hiragana and Katakana script to, and from, Romaji using Hepburn romanisation"
            },
            "https://github.com/twada/japanese-numerals-to-number": {
                "sub_category": "Converter",
                "repository_name": "japanese-numerals-to-number",
                "user_name": "twada",
                "description": "Converts Japanese Numerals into number"
            },
            "https://github.com/kariminf/jslingua": {
                "sub_category": "Converter",
                "repository_name": "jslingua",
                "user_name": "kariminf",
                "description": "Javascript libraries to process text: Arabic, Japanese, etc."
            },
            "https://github.com/WaniKani/WanaKana": {
                "sub_category": "Converter",
                "repository_name": "WanaKana",
                "user_name": "WaniKani",
                "description": "Javascript library for detecting and transliterating Hiragana <--> Katakana <--> Romaji"
            },
            "https://github.com/jeresig/node-romaji-name": {
                "sub_category": "Converter",
                "repository_name": "node-romaji-name",
                "user_name": "jeresig",
                "description": "Normalize and fix common issues with Romaji-based Japanese names."
            },
            "https://github.com/hakatashi/kyujitai.js": {
                "sub_category": "Converter",
                "repository_name": "kyujitai.js",
                "user_name": "hakatashi",
                "description": "Utility collections for making Japanese text old-fashioned"
            },
            "https://github.com/bangumi-data/bangumi-data": {
                "sub_category": null,
                "repository_name": "bangumi-data",
                "user_name": "bangumi-data",
                "description": "Raw data for Japanese Anime"
            },
            "https://github.com/FooSoft/yomichan": {
                "sub_category": null,
                "repository_name": "yomichan",
                "user_name": "FooSoft",
                "description": "Japanese pop-up dictionary extension for Chrome and Firefox."
            },
            "https://github.com/gecko655/proofreading-tool": {
                "sub_category": null,
                "repository_name": "proofreading-tool",
                "user_name": "gecko655",
                "description": "GUIで動作する文書校正ツール GUI tool for textlinting."
            },
            "https://github.com/minosvasilias/kanjigrid": {
                "sub_category": null,
                "repository_name": "kanjigrid",
                "user_name": "minosvasilias",
                "description": "A web-app displaying the 2200 kanji characters taught in James Heisig's \"Remembering the Kanji\", 6th edition."
            },
            "https://github.com/echamudi/japanese-toolkit": {
                "sub_category": null,
                "repository_name": "japanese-toolkit",
                "user_name": "echamudi",
                "description": "Monorepo for Kanji, Furigana, Japanese DB, and others"
            },
            "https://github.com/textlint-ja/analyze-desumasu-dearu": {
                "sub_category": null,
                "repository_name": "analyze-desumasu-dearu",
                "user_name": "textlint-ja",
                "description": "文の敬体(ですます調)、常体(である調)を解析するJavaScriptライブラリ"
            },
            "https://github.com/DJTB/hatsuon": {
                "sub_category": null,
                "repository_name": "hatsuon",
                "user_name": "DJTB",
                "description": "Japanese pitch accent utils"
            },
            "https://github.com/otodn/sentiment_ja_js": {
                "sub_category": null,
                "repository_name": "sentiment_ja_js",
                "user_name": "otodn",
                "description": "Sentiment Analysis in Japanese. sentiment_ja with JavaScript"
            },
            "https://github.com/takuyaa/mecab-ipadic-seed": {
                "sub_category": null,
                "repository_name": "mecab-ipadic-seed",
                "user_name": "takuyaa",
                "description": "mecab-ipadic seed dictionary reader"
            },
            "https://github.com/LuanRT/Japanese-Word-Of-The-Day": {
                "sub_category": null,
                "repository_name": "Japanese-Word-Of-The-Day",
                "user_name": "LuanRT",
                "description": "Well, a different Japanese word everyday."
            },
            "https://github.com/esrille/oskim": {
                "sub_category": null,
                "repository_name": "oskim",
                "user_name": "esrille",
                "description": "Extend GNOME On-Screen Keyboard for Input Methods"
            },
            "https://github.com/wtnv-lab/tweetMapping": {
                "sub_category": null,
                "repository_name": "tweetMapping",
                "user_name": "wtnv-lab",
                "description": "東日本大震災発生から24時間以内につぶやかれたジオタグ付きツイートのデジタルアーカイブです。"
            }
        },
        "Go": {
            "https://github.com/ikawaha/kagome": {
                "sub_category": "Morphology analysis",
                "repository_name": "kagome",
                "user_name": "ikawaha",
                "description": "Self-contained Japanese Morphological Analyzer written in pure Go"
            },
            "https://github.com/jiro4989/ojosama": {
                "sub_category": null,
                "repository_name": "ojosama",
                "user_name": "jiro4989",
                "description": "テキストを壱百満天原サロメお嬢様風の口調に変換します"
            },
            "https://github.com/gojp/nihongo": {
                "sub_category": null,
                "repository_name": "nihongo",
                "user_name": "gojp",
                "description": "Japanese Dictionary"
            },
            "https://github.com/FooSoft/yomichan-import": {
                "sub_category": null,
                "repository_name": "yomichan-import",
                "user_name": "FooSoft",
                "description": "External dictionary importer for Yomichan."
            },
            "https://github.com/maruamyu/imas-ime-dic": {
                "sub_category": null,
                "repository_name": "imas-ime-dic",
                "user_name": "maruamyu",
                "description": "THE IDOLM@STER words dictionary for Japanese IME (by imas-db.jp)"
            },
            "https://github.com/ktnyt/go-moji": {
                "sub_category": null,
                "repository_name": "go-moji",
                "user_name": "ktnyt",
                "description": "A Go library for Zenkaku/Hankaku conversion"
            }
        },
        "Java": {
            "https://github.com/atilika/kuromoji": {
                "sub_category": "Morphology analysis",
                "repository_name": "kuromoji",
                "user_name": "atilika",
                "description": "Kuromoji is a self-contained and very easy to use Japanese morphological analyzer designed for search"
            },
            "https://github.com/WorksApplications/SudachiDict": {
                "sub_category": "Morphology analysis",
                "repository_name": "SudachiDict",
                "user_name": "WorksApplications",
                "description": "A lexicon for Sudachi"
            },
            "https://github.com/sakarika/kanjitomo-ocr": {
                "sub_category": null,
                "repository_name": "kanjitomo-ocr",
                "user_name": "sakarika",
                "description": "Java library for identifying Japanese characters from images"
            },
            "https://github.com/nicolas-raoul/jakaroma": {
                "sub_category": null,
                "repository_name": "jakaroma",
                "user_name": "nicolas-raoul",
                "description": "Java library and command-line tool to transliterate Japanese kanji to romaji (Latin alphabet)"
            },
            "https://github.com/nicolas-raoul/kakasi-java": {
                "sub_category": null,
                "repository_name": "kakasi-java",
                "user_name": "nicolas-raoul",
                "description": "Kanji transliteration to hiragana/katakana/romaji, in Java"
            },
            "https://github.com/fauu/Kamite": {
                "sub_category": null,
                "repository_name": "Kamite",
                "user_name": "fauu",
                "description": "A desktop language immersion companion for learners of Japanese"
            },
            "https://github.com/craftzdog/react-native-japanese-tokenizer": {
                "sub_category": null,
                "repository_name": "react-native-japanese-tokenizer",
                "user_name": "craftzdog",
                "description": "Async Japanese Tokenizer Native Plugin for React Native for iOS and Android"
            },
            "https://github.com/suguru/elasticsearch-analysis-japanese": {
                "sub_category": null,
                "repository_name": "elasticsearch-analysis-japanese",
                "user_name": "suguru",
                "description": "Japanese analyzer uses kuromoji japanese tokenizer for ElasticSearch"
            },
            "https://github.com/andree-surya/moji4j": {
                "sub_category": null,
                "repository_name": "moji4j",
                "user_name": "andree-surya",
                "description": "A Java library to converts between Japanese Hiragana, Katakana, and Romaji scripts."
            },
            "https://github.com/ikegami-yukino/neologdn-java": {
                "sub_category": null,
                "repository_name": "neologdn-java",
                "user_name": "ikegami-yukino",
                "description": "Japanese text normalizer for mecab-neologd"
            }
        },
        "Pretrained model": {
            "https://github.com/philipperemy/japanese-words-to-vectors": {
                "sub_category": "Word2Vec",
                "repository_name": "japanese-words-to-vectors",
                "user_name": "philipperemy",
                "description": "Word2vec (word to vectors) approach for Japanese language using Gensim and Mecab."
            },
            "https://github.com/WorksApplications/chiVe": {
                "sub_category": "Word2Vec",
                "repository_name": "chiVe",
                "user_name": "WorksApplications",
                "description": "Japanese word embedding with Sudachi and NWJC"
            },
            "https://github.com/cl-tohoku/elmo-japanese": {
                "sub_category": "Word2Vec",
                "repository_name": "elmo-japanese",
                "user_name": "cl-tohoku",
                "description": "elmo-japanese"
            },
            "https://github.com/yagays/embedrank": {
                "sub_category": "Word2Vec",
                "repository_name": "embedrank",
                "user_name": "yagays",
                "description": "Python Implementation of EmbedRank"
            },
            "https://github.com/eggplants/aovec": {
                "sub_category": "Word2Vec",
                "repository_name": "aovec",
                "user_name": "eggplants",
                "description": "青空文庫全書籍のWord2Vecビルダー+構築済みモデル"
            },
            "https://github.com/lapras-inc/dependency-based-japanese-word-embeddings": {
                "sub_category": "Word2Vec",
                "repository_name": "dependency-based-japanese-word-embeddings",
                "user_name": "lapras-inc",
                "description": "This is a repository for the AI LAB article \"係り受けに基づく日本語単語埋込 (Dependency-based Japanese Word Embeddings)\" ( Article URL https://ai-lab.lapras.com/nlp/japanese-word-embedding/)"
            },
            "https://github.com/wikiwikification/jawikivec": {
                "sub_category": "Word2Vec",
                "repository_name": "jawikivec",
                "user_name": "wikiwikification",
                "description": "Yet Another Japanese-Wikipedia Entity Vectors"
            },
            "https://github.com/kamigaito/jawiki_word_vector_updater": {
                "sub_category": "Word2Vec",
                "repository_name": "jawiki_word_vector_updater",
                "user_name": "kamigaito",
                "description": "最新の日本語Wikipediaのダンプデータから，MeCabを用いてIPA辞書と最新のNeologd辞書の両方で形態素解析を実施し，その結果に基づいた word2vec，fastText，GloVeの単語分散表現を学習するためのスクリプト"
            },
            "https://github.com/cl-tohoku/bert-japanese": {
                "sub_category": "Transformer based models",
                "repository_name": "bert-japanese",
                "user_name": "cl-tohoku",
                "description": "BERT models for Japanese text."
            },
            "https://github.com/rinnakk/japanese-pretrained-models": {
                "sub_category": "Transformer based models",
                "repository_name": "japanese-pretrained-models",
                "user_name": "rinnakk",
                "description": "Code for producing Japanese pretrained models provided by rinna Co., Ltd."
            },
            "https://github.com/yoheikikuta/bert-japanese": {
                "sub_category": "Transformer based models",
                "repository_name": "bert-japanese",
                "user_name": "yoheikikuta",
                "description": "BERT with SentencePiece for Japanese text."
            },
            "https://github.com/WorksApplications/SudachiTra": {
                "sub_category": "Transformer based models",
                "repository_name": "SudachiTra",
                "user_name": "WorksApplications",
                "description": "Japanese tokenizer for Transformers"
            },
            "https://github.com/nttcslab/japanese-dialog-transformers": {
                "sub_category": "Transformer based models",
                "repository_name": "japanese-dialog-transformers",
                "user_name": "nttcslab",
                "description": "Code for evaluating Japanese pretrained models provided by NTT Ltd."
            },
            "https://github.com/octanove/shiba": {
                "sub_category": "Transformer based models",
                "repository_name": "shiba",
                "user_name": "octanove",
                "description": "Pytorch implementation and pre-trained Japanese model for CANINE, the efficient character-level transformer."
            },
            "https://github.com/reppy4620/Dialog": {
                "sub_category": "Transformer based models",
                "repository_name": "Dialog",
                "user_name": "reppy4620",
                "description": "A PyTorch Implementation of japanese chatbot using BERT and Transformer's decoder"
            },
            "https://github.com/retarfi/language-pretraining": {
                "sub_category": "Transformer based models",
                "repository_name": "language-pretraining",
                "user_name": "retarfi",
                "description": "BERT and ELECTRA models of PyTorch implementations for Japanese text."
            },
            "https://github.com/ou-medinfo/medbertjp": {
                "sub_category": "Transformer based models",
                "repository_name": "medbertjp",
                "user_name": "ou-medinfo",
                "description": "Trials of pre-trained BERT models for the medical domain in Japanese."
            },
            "https://github.com/cl-tohoku/ILYS-aoba-chatbot": {
                "sub_category": "Transformer based models",
                "repository_name": "ILYS-aoba-chatbot",
                "user_name": "cl-tohoku",
                "description": "ILYS-aoba-chatbot"
            },
            "https://github.com/megagonlabs/t5-japanese": {
                "sub_category": "Transformer based models",
                "repository_name": "t5-japanese",
                "user_name": "megagonlabs",
                "description": "Codes to pre-train Japanese T5 models"
            },
            "https://github.com/yagays/pytorch_bert_japanese": {
                "sub_category": "Transformer based models",
                "repository_name": "pytorch_bert_japanese",
                "user_name": "yagays",
                "description": "PytorchでBERTの日本語学習済みモデルを利用する"
            },
            "https://github.com/laboroai/Laboro-BERT-Japanese": {
                "sub_category": "Transformer based models",
                "repository_name": "Laboro-BERT-Japanese",
                "user_name": "laboroai",
                "description": "Laboro BERT Japanese: Japanese BERT Pre-Trained With Web-Corpus"
            },
            "https://github.com/tanreinama/RoBERTa-japanese": {
                "sub_category": "Transformer based models",
                "repository_name": "RoBERTa-japanese",
                "user_name": "tanreinama",
                "description": "Japanese BERT Pretrained Model"
            },
            "https://github.com/tanreinama/aMLP-japanese": {
                "sub_category": "Transformer based models",
                "repository_name": "aMLP-japanese",
                "user_name": "tanreinama",
                "description": "aMLP Transformer Model for Japanese"
            },
            "https://github.com/akirakubo/bert-japanese-aozora": {
                "sub_category": "Transformer based models",
                "repository_name": "bert-japanese-aozora",
                "user_name": "akirakubo",
                "description": "Japanese BERT trained on Aozora Bunko and Wikipedia, pre-tokenized by MeCab with UniDic & SudachiPy"
            },
            "https://github.com/colorfulscoop/sbert-ja": {
                "sub_category": "Transformer based models",
                "repository_name": "sbert-ja",
                "user_name": "colorfulscoop",
                "description": "Code to train Sentence BERT Japanese model for Hugging Face Model Hub"
            },
            "https://github.com/PatrickJohnRamos/BERT-Japan-vaccination": {
                "sub_category": "Transformer based models",
                "repository_name": "BERT-Japan-vaccination",
                "user_name": "PatrickJohnRamos",
                "description": "Official fine-tuning code for \"Emotion Analysis of Japanese Tweets and Comparison to Vaccinations in Japan\""
            },
            "https://github.com/tanreinama/gpt2-japanese": {
                "sub_category": "Transformer based models",
                "repository_name": "gpt2-japanese",
                "user_name": "tanreinama",
                "description": "Japanese GPT2 Generation Model"
            },
            "https://github.com/tanreinama/text2text-japanese": {
                "sub_category": "Transformer based models",
                "repository_name": "text2text-japanese",
                "user_name": "tanreinama",
                "description": "gpt-2 based text2text conversion model"
            },
            "https://github.com/colorfulscoop/gpt-ja": {
                "sub_category": "Transformer based models",
                "repository_name": "gpt-ja",
                "user_name": "colorfulscoop",
                "description": "GPT-2 Japanese model for HuggingFace's transformers"
            },
            "https://github.com/astremo/friendly_JA-Model": {
                "sub_category": "Transformer based models",
                "repository_name": "friendly_JA-Model",
                "user_name": "astremo",
                "description": "MT model trained using the friendly_JA Corpus attempting to make Japanese easier/more accessible to occidental people by using the Latin/English derived katakana lexicon instead of the standard Sino-Japanese lexicon"
            },
            "https://github.com/alinear-corp/albert-japanese": {
                "sub_category": "Transformer based models",
                "repository_name": "albert-japanese",
                "user_name": "alinear-corp",
                "description": "BERT with SentencePiece for Japanese text."
            },
            "https://github.com/Kosuke-Szk/ja_text_bert": {
                "sub_category": "Transformer based models",
                "repository_name": "ja_text_bert",
                "user_name": "Kosuke-Szk",
                "description": "日本語WikipediaコーパスでBERTのPre-Trainedモデルを生成するためのリポジトリ"
            },
            "https://github.com/BandaiNamcoResearchInc/DistilBERT-base-jp": {
                "sub_category": "Transformer based models",
                "repository_name": "DistilBERT-base-jp",
                "user_name": "BandaiNamcoResearchInc",
                "description": "A Japanese DistilBERT pretrained model, which was trained on Wikipedia."
            },
            "https://github.com/informatix-inc/bert": {
                "sub_category": "Transformer based models",
                "repository_name": "bert",
                "user_name": "informatix-inc",
                "description": "This repository provides snippets to use RoBERTa pre-trained on Japanese corpus. Our dataset consists of Japanese Wikipedia and web-scrolled articles, 25GB in total. The released model is built based on that from HuggingFace."
            },
            "https://github.com/laboroai/Laboro-DistilBERT-Japanese": {
                "sub_category": "Transformer based models",
                "repository_name": "Laboro-DistilBERT-Japanese",
                "user_name": "laboroai",
                "description": "Laboro DistilBERT Japanese"
            },
            "https://github.com/studio-ousia/luke": {
                "sub_category": "Transformer based models",
                "repository_name": "luke",
                "user_name": "studio-ousia",
                "description": "- Language Understanding with Knowledge-based Embeddings"
            },
            "https://github.com/tanreinama/GPTSAN": {
                "sub_category": "Transformer based models",
                "repository_name": "GPTSAN",
                "user_name": "tanreinama",
                "description": "General-purpose Swich transformer based Japanese language mode"
            },
            "https://github.com/rinnakk/japanese-clip": {
                "sub_category": "Transformer based models",
                "repository_name": "japanese-clip",
                "user_name": "rinnakk",
                "description": "Japanese CLIP by rinna Co., Ltd."
            },
            "https://github.com/EhimeNLP/AcademicBART": {
                "sub_category": "Transformer based models",
                "repository_name": "AcademicBART",
                "user_name": "EhimeNLP",
                "description": "We pretrained a BART-based Japanese masked language model on paper abstracts from the academic database CiNii Articles"
            },
            "https://github.com/EhimeNLP/AcademicRoBERTa": {
                "sub_category": "Transformer based models",
                "repository_name": "AcademicRoBERTa",
                "user_name": "EhimeNLP",
                "description": "We pretrained a RoBERTa-based Japanese masked language model on paper abstracts from the academic database CiNii Articles."
            },
            "https://github.com/line/LINE-DistilBERT-Japanese": {
                "sub_category": "Transformer based models",
                "repository_name": "LINE-DistilBERT-Japanese",
                "user_name": "line",
                "description": "DistilBERT model pre-trained on 131 GB of Japanese web text. The teacher model is BERT-base that built in-house at LINE."
            },
            "https://github.com/kunishou/Japanese-Alpaca-LoRA": {
                "sub_category": "Transformer based models",
                "repository_name": "Japanese-Alpaca-LoRA",
                "user_name": "kunishou",
                "description": "日本語に翻訳したStanford Alpacaのデータセットを用いてLLaMAをファインチューニングし作成したLow-Rank AdapterのリンクとGenerateサンプルコード"
            }
        },
        "ChatGPT": {
            "https://github.com/Yuchi-Games/VRChatGPT": {
                "sub_category": null,
                "repository_name": "VRChatGPT",
                "user_name": "Yuchi-Games",
                "description": "ChatGPTを使ってVRChat上でお喋り出来るようにするプログラム。"
            },
            "https://github.com/M-gen/AITuberDegikkoMirii": {
                "sub_category": null,
                "repository_name": "AITuberDegikkoMirii",
                "user_name": "M-gen",
                "description": "AITuberの基礎となる部分を開発しています"
            },
            "https://github.com/hirokidaichi/wanna": {
                "sub_category": null,
                "repository_name": "wanna",
                "user_name": "hirokidaichi",
                "description": "Shell command launcher with natural language"
            },
            "https://github.com/uezo/ChatdollKit": {
                "sub_category": null,
                "repository_name": "ChatdollKit",
                "user_name": "uezo",
                "description": "ChatdollKit enables you to make your 3D model into a chatbot"
            },
            "https://github.com/gyokuro33/ChuanhuChatGPTJapanese": {
                "sub_category": null,
                "repository_name": "ChuanhuChatGPTJapanese",
                "user_name": "gyokuro33",
                "description": "GUI for ChatGPT API For Japanese"
            },
            "https://github.com/manju-summoner/AISisterAIChan": {
                "sub_category": null,
                "repository_name": "AISisterAIChan",
                "user_name": "manju-summoner",
                "description": "ChatGPT3.5を搭載した伺かゴースト「AI妹アイちゃん」です。利用には別途ChatGPTのAPIキーが必要です。"
            },
            "https://github.com/Geson-anko/vrchatbot": {
                "sub_category": null,
                "repository_name": "vrchatbot",
                "user_name": "Geson-anko",
                "description": "VRChatにAI Botを作るためのリポジトリ"
            },
            "https://github.com/karakuri-ai/gptuber-by-langchain": {
                "sub_category": null,
                "repository_name": "gptuber-by-langchain",
                "user_name": "karakuri-ai",
                "description": "GPTがYouTuberをやります"
            },
            "https://github.com/supershaneski/openai-chatfriend": {
                "sub_category": null,
                "repository_name": "openai-chatfriend",
                "user_name": "supershaneski",
                "description": "A chatbox application built using Nuxt 3 powered by Open AI Text completion endpoint. You can select different personality of your AI friend. The default will respond in Japanese. You can use this app to practice your Nihongo skills!"
            },
            "https://github.com/franzwong/chrome-ext-translate-to-hiragana-with-chatgpt": {
                "sub_category": null,
                "repository_name": "chrome-ext-translate-to-hiragana-with-chatgpt",
                "user_name": "franzwong",
                "description": "This Chrome extension can translate selected Japanese text to Hiragana by using ChatGPT."
            },
            "https://github.com/nohanaga/azure-search-openai-demo": {
                "sub_category": null,
                "repository_name": "azure-search-openai-demo",
                "user_name": "nohanaga",
                "description": "このサンプルでは、Retrieval Augmented Generation パターンを使用して、独自のデータに対してChatGPT のような体験を作成するためのいくつかのアプローチを示しています。"
            },
            "https://github.com/pixiv/chatvrm": {
                "sub_category": null,
                "repository_name": "chatvrm",
                "user_name": "pixiv",
                "description": "ChatVRMはブラウザで簡単に3Dキャラクターと会話ができるデモアプリケーションです。"
            }
        },
        "Dictionary": {
            "https://github.com/neologd/mecab-ipadic-neologd": {
                "sub_category": null,
                "repository_name": "mecab-ipadic-neologd",
                "user_name": "neologd",
                "description": "Neologism dictionary based on the language resources on the Web for mecab-ipadic"
            },
            "https://github.com/PKSHATechnology-Research/tdmelodic": {
                "sub_category": null,
                "repository_name": "tdmelodic",
                "user_name": "PKSHATechnology-Research",
                "description": "A Japanese accent dictionary generator"
            },
            "https://github.com/neocl/jamdict": {
                "sub_category": null,
                "repository_name": "jamdict",
                "user_name": "neocl",
                "description": "Python 3 library for manipulating Jim Breen's JMdict, KanjiDic2, JMnedict and kanji-radical mappings"
            },
            "https://github.com/polm/unidic-py": {
                "sub_category": null,
                "repository_name": "unidic-py",
                "user_name": "polm",
                "description": "Unidic packaged for installation via pip."
            },
            "https://github.com/chakki-works/Japanese-Company-Lexicon": {
                "sub_category": null,
                "repository_name": "Japanese-Company-Lexicon",
                "user_name": "chakki-works",
                "description": "Japanese Company Lexicon (JCLdic)"
            },
            "https://github.com/yagays/manbyo-sudachi": {
                "sub_category": null,
                "repository_name": "manbyo-sudachi",
                "user_name": "yagays",
                "description": "Sudachi向け万病辞書"
            },
            "https://github.com/tokuhirom/jawiki-kana-kanji-dict": {
                "sub_category": null,
                "repository_name": "jawiki-kana-kanji-dict",
                "user_name": "tokuhirom",
                "description": "Generate SKK/MeCab dictionary from Wikipedia(Japanese edition)"
            },
            "https://github.com/sociocom/JIWC-Dictionary": {
                "sub_category": null,
                "repository_name": "JIWC-Dictionary",
                "user_name": "sociocom",
                "description": "dictionary to find emotion related to text"
            },
            "https://github.com/ku-nlp/JumanDIC": {
                "sub_category": null,
                "repository_name": "JumanDIC",
                "user_name": "ku-nlp",
                "description": "This repository contains source dictionary files to build dictionaries for JUMAN and Juman++."
            },
            "https://github.com/polm/ipadic-py": {
                "sub_category": null,
                "repository_name": "ipadic-py",
                "user_name": "polm",
                "description": "IPAdic packaged for easy use from Python."
            },
            "https://github.com/polm/unidic-lite": {
                "sub_category": null,
                "repository_name": "unidic-lite",
                "user_name": "polm",
                "description": "A small version of UniDic for easy pip installs."
            },
            "https://github.com/peaceiris/emoji-ime-dictionary": {
                "sub_category": null,
                "repository_name": "emoji-ime-dictionary",
                "user_name": "peaceiris",
                "description": "日本語で絵文字入力をするための IME 追加辞書 orange_book Google 日本語入力などで日本語から絵文字への変換を可能にする IME 拡張辞書"
            },
            "https://github.com/peaceiris/google-ime-dictionary": {
                "sub_category": null,
                "repository_name": "google-ime-dictionary",
                "user_name": "peaceiris",
                "description": "日英変換・英語略語展開のための IME 追加辞書 orange_book 日本語から英語への和英変換や英語略語の展開を Google 日本語入力や ATOK などで可能にする IME 拡張辞書"
            },
            "https://github.com/ncaq/dic-nico-intersection-pixiv": {
                "sub_category": null,
                "repository_name": "dic-nico-intersection-pixiv",
                "user_name": "ncaq",
                "description": "ニコニコ大百科とピクシブ百科事典の共通部分のIME辞書"
            },
            "https://github.com/KEINOS/google-ime-user-dictionary-ja-en": {
                "sub_category": null,
                "repository_name": "google-ime-user-dictionary-ja-en",
                "user_name": "KEINOS",
                "description": "GoogleIME用カタカナ語辞書プロジェクトのアーカイブです。Project archive of Google IME user dictionary from Katakana word ( Japanese loanword ) to English."
            },
            "https://github.com/tiwanari/emoticon": {
                "sub_category": null,
                "repository_name": "emoticon",
                "user_name": "tiwanari",
                "description": "Google日本語入力の顔文字辞書∩(,,Ò‿Ó,,)∩"
            },
            "https://github.com/akirakubo/mecab-mozcdic": {
                "sub_category": null,
                "repository_name": "mecab-mozcdic",
                "user_name": "akirakubo",
                "description": "open source mozc dictionaryをMeCab辞書のフォーマットに変換したものです。"
            },
            "https://github.com/albno273/denonbu-ime-dic": {
                "sub_category": null,
                "repository_name": "denonbu-ime-dic",
                "user_name": "albno273",
                "description": "電音IME: Microsoft IMEなどで利用することを想定した「電音部」関連用語の辞書"
            },
            "https://github.com/Umichang/nijisanji-ime-dic": {
                "sub_category": null,
                "repository_name": "nijisanji-ime-dic",
                "user_name": "Umichang",
                "description": "Microsoft IMEなどで利用することを想定した「にじさんじ」関連用語の用語辞書です。"
            },
            "https://github.com/Umichang/pokemon-ime-dic": {
                "sub_category": null,
                "repository_name": "pokemon-ime-dic",
                "user_name": "Umichang",
                "description": "Microsoft IMEなどで利用することを想定した、現状判明している全てのポケモンの名前を網羅した用語辞書です。"
            },
            "https://github.com/kujirahand/EJDict": {
                "sub_category": null,
                "repository_name": "EJDict",
                "user_name": "kujirahand",
                "description": "English-Japanese Dictionary data (Public Domain) EJDict-hand"
            },
            "https://github.com/Rinrin0413/Ayashiy-Nipongo-Dic": {
                "sub_category": null,
                "repository_name": "Ayashiy-Nipongo-Dic",
                "user_name": "Rinrin0413",
                "description": "贵樣ばこゐ辞畫を使て正レい日本语を使ラことが出來ゑ。"
            },
            "https://github.com/kotofurumiya/genshin-dict": {
                "sub_category": null,
                "repository_name": "genshin-dict",
                "user_name": "kotofurumiya",
                "description": "Windows/macOSで使える原神の単語辞書です"
            },
            "https://github.com/scriptin/jmdict-simplified": {
                "sub_category": null,
                "repository_name": "jmdict-simplified",
                "user_name": "scriptin",
                "description": "JMdict and JMnedict in JSON format"
            },
            "https://github.com/reasonset/mozcdict-ext": {
                "sub_category": null,
                "repository_name": "mozcdict-ext",
                "user_name": "reasonset",
                "description": "Convert external words into Mozc system dictionary"
            }
        },
        "Corpus": {
            "https://github.com/stockmarkteam/ner-wikipedia-dataset": {
                "sub_category": "Part-of-speech tagging / Named entity recognition",
                "repository_name": "ner-wikipedia-dataset",
                "user_name": "stockmarkteam",
                "description": "Wikipediaを用いた日本語の固有表現抽出データセット"
            },
            "https://github.com/Hironsan/IOB2Corpus": {
                "sub_category": "Part-of-speech tagging / Named entity recognition",
                "repository_name": "IOB2Corpus",
                "user_name": "Hironsan",
                "description": "Japanese IOB2 tagged corpus for Named Entity Recognition."
            },
            "https://github.com/tmu-nlp/TwitterCorpus": {
                "sub_category": "Part-of-speech tagging / Named entity recognition",
                "repository_name": "TwitterCorpus",
                "user_name": "tmu-nlp",
                "description": "首都大日本語 Twitter コーパス"
            },
            "https://github.com/megagonlabs/UD_Japanese-PUD": {
                "sub_category": "Part-of-speech tagging / Named entity recognition",
                "repository_name": "UD_Japanese-PUD",
                "user_name": "megagonlabs",
                "description": "Parallel Universal Dependencies."
            },
            "https://github.com/megagonlabs/UD_Japanese-GSD": {
                "sub_category": "Part-of-speech tagging / Named entity recognition",
                "repository_name": "UD_Japanese-GSD",
                "user_name": "megagonlabs",
                "description": "Japanese data from the Google UDT 2.0."
            },
            "https://github.com/ku-nlp/KWDLC": {
                "sub_category": "Part-of-speech tagging / Named entity recognition",
                "repository_name": "KWDLC",
                "user_name": "ku-nlp",
                "description": "Kyoto University Web Document Leads Corpus"
            },
            "https://github.com/ku-nlp/AnnotatedFKCCorpus": {
                "sub_category": "Part-of-speech tagging / Named entity recognition",
                "repository_name": "AnnotatedFKCCorpus",
                "user_name": "ku-nlp",
                "description": "Annotated Fuman Kaitori Center Corpus"
            },
            "https://github.com/odashi/small_parallel_enja": {
                "sub_category": "Parallel corpus",
                "repository_name": "small_parallel_enja",
                "user_name": "odashi",
                "description": "50k English-Japanese Parallel Corpus for Machine Translation Benchmark."
            },
            "https://github.com/zhang-jinyi/Web-Crawled-Corpus-for-Japanese-Chinese-NMT": {
                "sub_category": "Parallel corpus",
                "repository_name": "Web-Crawled-Corpus-for-Japanese-Chinese-NMT",
                "user_name": "zhang-jinyi",
                "description": "A Web Crawled Corpus for Japanese-Chinese NMT"
            },
            "https://github.com/shyyhs/CourseraParallelCorpusMining": {
                "sub_category": "Parallel corpus",
                "repository_name": "CourseraParallelCorpusMining",
                "user_name": "shyyhs",
                "description": "Coursera Corpus Mining and Multistage Fine-Tuning for Improving Lectures Translation"
            },
            "https://github.com/rpryzant/JESC": {
                "sub_category": "Parallel corpus",
                "repository_name": "JESC",
                "user_name": "rpryzant",
                "description": "A large parallel corpus of English and Japanese"
            },
            "https://github.com/tsuruoka-lab/AMI-Meeting-Parallel-Corpus": {
                "sub_category": "Parallel corpus",
                "repository_name": "AMI-Meeting-Parallel-Corpus",
                "user_name": "tsuruoka-lab",
                "description": "AMI Meeting Parallel Corpus"
            },
            "https://github.com/DayuanJiang/giant_ja-en_parallel_corpus": {
                "sub_category": "Parallel corpus",
                "repository_name": "giant_ja-en_parallel_corpus",
                "user_name": "DayuanJiang",
                "description": "This directory includes a giant Japanese-English subtitle corpus. The raw data comes from the Stanford’s JESC project."
            },
            "https://github.com/yusugomori/jesc_small": {
                "sub_category": "Parallel corpus",
                "repository_name": "jesc_small",
                "user_name": "yusugomori",
                "description": "Small Japanese-English Subtitle Corpus"
            },
            "https://github.com/marmooo/graded-enja-corpus": {
                "sub_category": "Parallel corpus",
                "repository_name": "graded-enja-corpus",
                "user_name": "marmooo",
                "description": "禁止用語や単語レベルを考慮した日英対訳コーパスです。"
            },
            "https://github.com/dahlia/cjk-compsci-terms": {
                "sub_category": "Parallel corpus",
                "repository_name": "cjk-compsci-terms",
                "user_name": "dahlia",
                "description": "CJK computer science terms comparison / 中日韓電腦科學術語對照 / 日中韓のコンピュータ科学の用語対照 / 한·중·일 전산학 용어 대조"
            },
            "https://github.com/laboroai/Laboro-ParaCorpus": {
                "sub_category": "Parallel corpus",
                "repository_name": "Laboro-ParaCorpus",
                "user_name": "laboroai",
                "description": "Scripts for creating a Japanese-English parallel corpus and training NMT models"
            },
            "https://github.com/Tzawa/google-vs-deepl-je": {
                "sub_category": "Parallel corpus",
                "repository_name": "google-vs-deepl-je",
                "user_name": "Tzawa",
                "description": "google-vs-deepl-je"
            },
            "https://github.com/ku-nlp/JMRD": {
                "sub_category": "Dialog corpus",
                "repository_name": "JMRD",
                "user_name": "ku-nlp",
                "description": "Japanese Movie Recommendation Dialogue dataset"
            },
            "https://github.com/1never/open2ch-dialogue-corpus": {
                "sub_category": "Dialog corpus",
                "repository_name": "open2ch-dialogue-corpus",
                "user_name": "1never",
                "description": "おーぷん2ちゃんねるをクロールして作成した対話コーパス"
            },
            "https://github.com/tsuruoka-lab/BSD": {
                "sub_category": "Dialog corpus",
                "repository_name": "BSD",
                "user_name": "tsuruoka-lab",
                "description": "The Business Scene Dialogue corpus"
            },
            "https://github.com/megagonlabs/asdc": {
                "sub_category": "Dialog corpus",
                "repository_name": "asdc",
                "user_name": "megagonlabs",
                "description": "Accommodation Search Dialog Corpus (宿泊施設探索対話コーパス)"
            },
            "https://github.com/MokkeMeguru/japanese-corpus": {
                "sub_category": "Dialog corpus",
                "repository_name": "japanese-corpus",
                "user_name": "MokkeMeguru",
                "description": "日本語の対話データ for seq2seq etc"
            },
            "https://github.com/cl-tohoku/BPersona-chat": {
                "sub_category": "Dialog corpus",
                "repository_name": "BPersona-chat",
                "user_name": "cl-tohoku",
                "description": "This repository contains the Japanese–English bilingual chat corpus BPersona-chat published in the paper Chat Translation Error Detection for Assisting Cross-lingual Communications at AACL-IJCNLP 2022's Workshop Eval4NLP 2022."
            },
            "https://github.com/jqk09a/japanese-daily-dialogue": {
                "sub_category": "Dialog corpus",
                "repository_name": "japanese-daily-dialogue",
                "user_name": "jqk09a",
                "description": "Japanese Daily Dialogue, or 日本語日常対話コーパス in Japanese, is a high-quality multi-turn dialogue dataset containing daily conversations on five topics: dailylife, school, travel, health, and entertainment."
            },
            "https://github.com/megagonlabs/jrte-corpus": {
                "sub_category": null,
                "repository_name": "jrte-corpus",
                "user_name": "megagonlabs",
                "description": "Japanese Realistic Textual Entailment Corpus (NLP 2020, LREC 2020)"
            },
            "https://github.com/davidluzgouveia/kanji-data": {
                "sub_category": null,
                "repository_name": "kanji-data",
                "user_name": "davidluzgouveia",
                "description": "A JSON kanji dataset with updated JLPT levels and WaniKani information"
            },
            "https://github.com/tmu-nlp/JapaneseWordSimilarityDataset": {
                "sub_category": null,
                "repository_name": "JapaneseWordSimilarityDataset",
                "user_name": "tmu-nlp",
                "description": "Japanese Word Similarity Dataset"
            },
            "https://github.com/tmu-nlp/simple-jppdb": {
                "sub_category": null,
                "repository_name": "simple-jppdb",
                "user_name": "tmu-nlp",
                "description": "A paraphrase database for Japanese text simplification"
            },
            "https://github.com/chakki-works/chABSA-dataset": {
                "sub_category": null,
                "repository_name": "chABSA-dataset",
                "user_name": "chakki-works",
                "description": "chakki's Aspect-Based Sentiment Analysis dataset"
            },
            "https://github.com/SkelterLabsInc/JaQuAD": {
                "sub_category": null,
                "repository_name": "JaQuAD",
                "user_name": "SkelterLabsInc",
                "description": "JaQuAD: Japanese Question Answering Dataset for Machine Reading Comprehension (2022, Skelter Labs)"
            },
            "https://github.com/verypluming/JaNLI": {
                "sub_category": null,
                "repository_name": "JaNLI",
                "user_name": "verypluming",
                "description": "Japanese Adversarial Natural Language Inference Dataset"
            },
            "https://github.com/megagonlabs/ebe-dataset": {
                "sub_category": null,
                "repository_name": "ebe-dataset",
                "user_name": "megagonlabs",
                "description": "Evidence-based Explanation Dataset (AACL-IJCNLP 2020)"
            },
            "https://github.com/yagays/emoji-ja": {
                "sub_category": null,
                "repository_name": "emoji-ja",
                "user_name": "yagays",
                "description": "UNICODE絵文字の日本語読み/キーワード/分類辞書"
            },
            "https://github.com/yagays/nayose-wikipedia-ja": {
                "sub_category": null,
                "repository_name": "nayose-wikipedia-ja",
                "user_name": "yagays",
                "description": "Wikipediaから作成した日本語名寄せデータセット"
            },
            "https://github.com/Hironsan/ja.text8": {
                "sub_category": null,
                "repository_name": "ja.text8",
                "user_name": "Hironsan",
                "description": "Japanese text8 corpus for word embedding."
            },
            "https://github.com/KodairaTomonori/ThreeLineSummaryDataset": {
                "sub_category": null,
                "repository_name": "ThreeLineSummaryDataset",
                "user_name": "KodairaTomonori",
                "description": "3行要約データセット"
            },
            "https://github.com/hingston/japanese": {
                "sub_category": null,
                "repository_name": "japanese",
                "user_name": "hingston",
                "description": "This repo contains a list of the 44,998 most common Japanese words in order of frequency, as determined by the University of Leeds Corpus."
            },
            "https://github.com/scriptin/kanji-frequency": {
                "sub_category": null,
                "repository_name": "kanji-frequency",
                "user_name": "scriptin",
                "description": "Kanji usage frequency data collected from various sources"
            },
            "https://github.com/laboroai/TEDxJP-10K": {
                "sub_category": null,
                "repository_name": "TEDxJP-10K",
                "user_name": "laboroai",
                "description": "TEDxJP-10K ASR Evaluation Dataset"
            },
            "https://github.com/chakki-works/CoARiJ": {
                "sub_category": null,
                "repository_name": "CoARiJ",
                "user_name": "chakki-works",
                "description": "Corpus of Annual Reports in Japan"
            },
            "https://github.com/textlint-ja/technological-book-corpus-ja": {
                "sub_category": null,
                "repository_name": "technological-book-corpus-ja",
                "user_name": "textlint-ja",
                "description": "日本語で書かれた技術書を収集した生コーパス/ツール"
            },
            "https://github.com/shirayu/ita-corpus-chuwa": {
                "sub_category": null,
                "repository_name": "ita-corpus-chuwa",
                "user_name": "shirayu",
                "description": "Chunked word annotation for ITA corpus"
            },
            "https://github.com/singletongue/wikipedia-utils": {
                "sub_category": null,
                "repository_name": "wikipedia-utils",
                "user_name": "singletongue",
                "description": "Utility scripts for preprocessing Wikipedia texts for NLP"
            },
            "https://github.com/MosasoM/inappropriate-words-ja": {
                "sub_category": null,
                "repository_name": "inappropriate-words-ja",
                "user_name": "MosasoM",
                "description": "日本語における不適切表現を収集します。自然言語処理の時のデータクリーニング用等に使えると思います。"
            },
            "https://github.com/smartnews-smri/house-of-councillors": {
                "sub_category": null,
                "repository_name": "house-of-councillors",
                "user_name": "smartnews-smri",
                "description": "参議院の公式ウェブサイトから会派、議員、議案、質問主意書のデータを整理しました。"
            },
            "https://github.com/smartnews-smri/house-of-representatives": {
                "sub_category": null,
                "repository_name": "house-of-representatives",
                "user_name": "smartnews-smri",
                "description": "国会議案データベース：衆議院"
            },
            "https://github.com/STAIR-Lab-CIT/STAIR-captions": {
                "sub_category": null,
                "repository_name": "STAIR-captions",
                "user_name": "STAIR-Lab-CIT",
                "description": "STAIR captions: large-scale Japanese image caption dataset"
            },
            "https://github.com/ku-nlp/Winograd-Schema-Challenge-Ja": {
                "sub_category": null,
                "repository_name": "Winograd-Schema-Challenge-Ja",
                "user_name": "ku-nlp",
                "description": "Japanese Translation of Winograd Schema Challenge"
            },
            "https://github.com/ku-nlp/speechBSD": {
                "sub_category": null,
                "repository_name": "speechBSD",
                "user_name": "ku-nlp",
                "description": "An extension of the BSD corpus with audio and speaker attribute information"
            },
            "https://github.com/mmorise/ita-corpus": {
                "sub_category": null,
                "repository_name": "ita-corpus",
                "user_name": "mmorise",
                "description": "ITAコーパスの文章リスト"
            },
            "https://github.com/mmorise/rohan4600": {
                "sub_category": null,
                "repository_name": "rohan4600",
                "user_name": "mmorise",
                "description": "モーラバランス型日本語コーパス"
            },
            "https://github.com/whym/anlp-jp-history": {
                "sub_category": null,
                "repository_name": "anlp-jp-history",
                "user_name": "whym",
                "description": "言語処理学会年次大会講演の全リスト・機械可読版など"
            },
            "https://github.com/cl-tohoku/keigo_transfer_task": {
                "sub_category": null,
                "repository_name": "keigo_transfer_task",
                "user_name": "cl-tohoku",
                "description": "敬語変換タスクにおける評価用データセット"
            },
            "https://github.com/jamesohortle/loanwords_gairaigo": {
                "sub_category": null,
                "repository_name": "loanwords_gairaigo",
                "user_name": "jamesohortle",
                "description": "English loanwords in Japanese"
            },
            "https://github.com/wikiwikification/jawikicorpus": {
                "sub_category": null,
                "repository_name": "jawikicorpus",
                "user_name": "wikiwikification",
                "description": "Japanese-Wikipedia Wikification Corpus"
            },
            "https://github.com/yuukimiyo/GeneralPolicySpeechOfPrimeMinisterOfJapan": {
                "sub_category": null,
                "repository_name": "GeneralPolicySpeechOfPrimeMinisterOfJapan",
                "user_name": "yuukimiyo",
                "description": "This is the corpus of Japanese Text that general policy speech of prime minister of Japan"
            },
            "https://github.com/ids-cv/wrime": {
                "sub_category": null,
                "repository_name": "wrime",
                "user_name": "ids-cv",
                "description": "WRIME: 主観と客観の感情分析データセット"
            },
            "https://github.com/sarulab-speech/jtubespeech": {
                "sub_category": null,
                "repository_name": "jtubespeech",
                "user_name": "sarulab-speech",
                "description": "JTubeSpeech: Corpus of Japanese speech collected from YouTube"
            },
            "https://github.com/maeda6uiui-backup/WikipediaWordFrequencyList": {
                "sub_category": null,
                "repository_name": "WikipediaWordFrequencyList",
                "user_name": "maeda6uiui-backup",
                "description": "日本語Wikipediaで使用される頻出単語のリスト"
            },
            "https://github.com/rindybell/kokkosho_data": {
                "sub_category": null,
                "repository_name": "kokkosho_data",
                "user_name": "rindybell",
                "description": "車両不具合情報に関するデータセット"
            },
            "https://github.com/ndl-lab/pdmocrdataset-part1": {
                "sub_category": null,
                "repository_name": "pdmocrdataset-part1",
                "user_name": "ndl-lab",
                "description": "デジタル化資料OCRテキスト化事業において作成されたOCR学習用データセット"
            },
            "https://github.com/ndl-lab/huriganacorpus-ndlbib": {
                "sub_category": null,
                "repository_name": "huriganacorpus-ndlbib",
                "user_name": "ndl-lab",
                "description": "全国書誌データから作成した振り仮名のデータセット"
            },
            "https://github.com/Hiroshiba/jvs_hiho": {
                "sub_category": null,
                "repository_name": "jvs_hiho",
                "user_name": "Hiroshiba",
                "description": "JVS (Japanese versatile speech) コーパスの自作のラベル"
            },
            "https://github.com/po3rin/hirakanadic": {
                "sub_category": null,
                "repository_name": "hirakanadic",
                "user_name": "po3rin",
                "description": "Allows Sudachi to normalize from hiragana to katakana from any compound word list"
            },
            "https://github.com/anilogia/animedb": {
                "sub_category": null,
                "repository_name": "animedb",
                "user_name": "anilogia",
                "description": "約100年に渡るアニメ作品リストデータベース"
            },
            "https://github.com/SaitoLab/security_words": {
                "sub_category": null,
                "repository_name": "security_words",
                "user_name": "SaitoLab",
                "description": "サイバーセキュリティに関連する公的な組織の日英対応"
            },
            "https://github.com/sugi2000/Data-on-Japanese-Diet-Members": {
                "sub_category": null,
                "repository_name": "Data-on-Japanese-Diet-Members",
                "user_name": "sugi2000",
                "description": "日本の国会議員のデータ"
            },
            "https://github.com/yuta1984/honkoku-data": {
                "sub_category": null,
                "repository_name": "honkoku-data",
                "user_name": "yuta1984",
                "description": "歴史資料の市民参加型翻刻プラットフォーム「みんなで翻刻」のテキストデータ置き場です。 / Transcription texts created on Minna de Honkoku (https://honkoku.org), a crowdsourced transcription platform for historical Japanese documents."
            },
            "https://github.com/Katsumata420/wikihow_japanese": {
                "sub_category": null,
                "repository_name": "wikihow_japanese",
                "user_name": "Katsumata420",
                "description": "wikiHow dataset (Japanese version)"
            },
            "https://github.com/mercari/engineer-vocabulary-list": {
                "sub_category": null,
                "repository_name": "engineer-vocabulary-list",
                "user_name": "mercari",
                "description": "Engineer Vocabulary List in Japanese/English"
            },
            "https://github.com/verypluming/JSICK": {
                "sub_category": null,
                "repository_name": "JSICK",
                "user_name": "verypluming",
                "description": "Japanese Sentences Involving Compositional Knowledge (JSICK) Dataset/JSICK-stress Test Set"
            },
            "https://github.com/JPCERTCC/phishurl-list": {
                "sub_category": null,
                "repository_name": "phishurl-list",
                "user_name": "JPCERTCC",
                "description": "Phishing URL dataset from JPCERT/CC"
            },
            "https://github.com/shigashiyama/jcms": {
                "sub_category": null,
                "repository_name": "jcms",
                "user_name": "shigashiyama",
                "description": "A Japanese Corpus of Many Specialized Domains (JCMS)"
            },
            "https://github.com/aozorahack/aozorabunko_text": {
                "sub_category": null,
                "repository_name": "aozorabunko_text",
                "user_name": "aozorahack",
                "description": "text-only archives of www.aozora.gr.jp"
            },
            "https://github.com/astremo/friendly_JA-Corpus": {
                "sub_category": null,
                "repository_name": "friendly_JA-Corpus",
                "user_name": "astremo",
                "description": "friendly_JA is a parallel Japanese-to-Japanese corpus aimed at making Japanese easier by using the Latin/English derived katakana lexicon instead of the standard Sino-Japanese lexicon"
            },
            "https://github.com/scriptin/topokanji": {
                "sub_category": null,
                "repository_name": "topokanji",
                "user_name": "scriptin",
                "description": "Topologically ordered lists of kanji for effective learning"
            },
            "https://github.com/uribo/isbn4groups": {
                "sub_category": null,
                "repository_name": "isbn4groups",
                "user_name": "uribo",
                "description": "ISBN-13における日本語での出版物 (978-4-XXXXXXXXX) に関するデータ等"
            },
            "https://github.com/komutan/NMeCab": {
                "sub_category": null,
                "repository_name": "NMeCab",
                "user_name": "komutan",
                "description": "NMeCab: About Japanese morphological analyzer on .NET"
            },
            "https://github.com/ndl-lab/ndlngramdata": {
                "sub_category": null,
                "repository_name": "ndlngramdata",
                "user_name": "ndl-lab",
                "description": "デジタル化資料から作成したOCRテキストデータのngram頻度統計情報のデータセット"
            },
            "https://github.com/ndl-lab/ndlngramviewer_v2": {
                "sub_category": null,
                "repository_name": "ndlngramviewer_v2",
                "user_name": "ndl-lab",
                "description": "2023年1月にリニューアルしたNDL Ngram Viewerのソースコード等一式"
            },
            "https://github.com/japanese-law-analysis/data_set": {
                "sub_category": null,
                "repository_name": "data_set",
                "user_name": "japanese-law-analysis",
                "description": "法律・判例関係のデータセット"
            },
            "https://github.com/shunk031/huggingface-datasets_wrime": {
                "sub_category": null,
                "repository_name": "huggingface-datasets_wrime",
                "user_name": "shunk031",
                "description": "WRIME for huggingface datasets"
            },
            "https://github.com/ndl-lab/ndl-minhon-ocrdataset": {
                "sub_category": null,
                "repository_name": "ndl-minhon-ocrdataset",
                "user_name": "ndl-lab",
                "description": "NDL古典籍OCR学習用データセット（みんなで翻刻加工データ）"
            },
            "https://github.com/AsPJT/PAX_SAPIENTICA": {
                "sub_category": null,
                "repository_name": "PAX_SAPIENTICA",
                "user_name": "AsPJT",
                "description": "GIS & Archaeological Simulator. 2023 in development."
            },
            "https://github.com/tasukuigarashi/j-liwc2015": {
                "sub_category": null,
                "repository_name": "j-liwc2015",
                "user_name": "tasukuigarashi",
                "description": "Japanese version of LIWC2015"
            },
            "https://github.com/shunk031/huggingface-datasets_livedoor-news-corpus": {
                "sub_category": null,
                "repository_name": "huggingface-datasets_livedoor-news-corpus",
                "user_name": "shunk031",
                "description": "Japanese Livedoor news corpus for huggingface datasets"
            },
            "https://github.com/shunk031/huggingface-datasets_JGLUE": {
                "sub_category": null,
                "repository_name": "huggingface-datasets_JGLUE",
                "user_name": "shunk031",
                "description": "JGLUE: Japanese General Language Understanding Evaluation for huggingface datasets"
            },
            "https://github.com/Language-Media-Lab/commonsense-moral-ja": {
                "sub_category": null,
                "repository_name": "commonsense-moral-ja",
                "user_name": "Language-Media-Lab",
                "description": "JCommonsenseMorality is a dataset created through crowdsourcing that reflects the commonsense morality of Japanese annotators."
            },
            "https://github.com/nlp-waseda/comet-atomic-ja": {
                "sub_category": null,
                "repository_name": "comet-atomic-ja",
                "user_name": "nlp-waseda",
                "description": "COMET-ATOMIC ja"
            },
            "https://github.com/nlp-waseda/dcsg-ja": {
                "sub_category": null,
                "repository_name": "dcsg-ja",
                "user_name": "nlp-waseda",
                "description": "Dialogue Commonsense Graph in Japanese"
            },
            "https://github.com/inspection-ai/japanese-toxic-dataset": {
                "sub_category": null,
                "repository_name": "japanese-toxic-dataset",
                "user_name": "inspection-ai",
                "description": "\"Proposal and Evaluation of Japanese Toxicity Schema\" provides a schema and dataset for toxicity in the Japanese language."
            },
            "https://github.com/CyberAgentAILab/camera": {
                "sub_category": null,
                "repository_name": "camera",
                "user_name": "CyberAgentAILab",
                "description": "CAMERA (CyberAgent Multimodal Evaluation for Ad Text GeneRAtion) is the Japanese ad text generation dataset."
            },
            "https://github.com/tanreinama/Japanese-Fakenews-Dataset": {
                "sub_category": null,
                "repository_name": "Japanese-Fakenews-Dataset",
                "user_name": "tanreinama",
                "description": "日本語フェイクニュースデータセット"
            },
            "https://github.com/aiishii/jpn_explainable_qa_dataset": {
                "sub_category": null,
                "repository_name": "jpn_explainable_qa_dataset",
                "user_name": "aiishii",
                "description": "jpn_explainable_qa_dataset"
            },
            "https://github.com/nlp-titech/copa-japanese": {
                "sub_category": null,
                "repository_name": "copa-japanese",
                "user_name": "nlp-titech",
                "description": "COPA Dataset in Japanese"
            },
            "https://github.com/masayu-a/WLSP-familiarity": {
                "sub_category": null,
                "repository_name": "WLSP-familiarity",
                "user_name": "masayu-a",
                "description": "Word Familiarity Rate for 'Word List by Semantic Principles (WLSP)'"
            },
            "https://github.com/matbahasa/ProSub": {
                "sub_category": null,
                "repository_name": "ProSub",
                "user_name": "matbahasa",
                "description": "A cross-linguistic study of pronoun substitutes and address terms"
            },
            "https://github.com/nuko-yokohama/ramendb": {
                "sub_category": null,
                "repository_name": "ramendb",
                "user_name": "nuko-yokohama",
                "description": "なんとかデータベース( https://supleks.jp/ )からのスクレイピングツールと収集データ"
            },
            "https://github.com/shunk031/huggingface-datasets_CAMERA": {
                "sub_category": null,
                "repository_name": "huggingface-datasets_CAMERA",
                "user_name": "shunk031",
                "description": "CAMERA (CyberAgent Multimodal Evaluation for Ad Text GeneRAtion) for huggingface datasets"
            },
            "https://github.com/nlp-waseda/FactCheckSentenceNLI-FCSNLI-": {
                "sub_category": null,
                "repository_name": "FactCheckSentenceNLI-FCSNLI-",
                "user_name": "nlp-waseda",
                "description": "FactCheckSentenceNLIデータセット"
            },
            "https://github.com/kunishou/databricks-dolly-15k-ja": {
                "sub_category": null,
                "repository_name": "databricks-dolly-15k-ja",
                "user_name": "kunishou",
                "description": "databricks/dolly-v2-12b の学習データに使用されたdatabricks-dolly-15k.jsonl を日本語に翻訳したデータセットになります。"
            },
            "https://github.com/ku-nlp/EaST-MELD": {
                "sub_category": null,
                "repository_name": "EaST-MELD",
                "user_name": "ku-nlp",
                "description": "EaST-MELD is an English-Japanese dataset for emotion-aware speech translation based on MELD."
            },
            "https://github.com/elith-co-jp/meconaudio": {
                "sub_category": null,
                "repository_name": "meconaudio",
                "user_name": "elith-co-jp",
                "description": "Mecon Audio(Medical Conference Audio)は厚生労働省主催の先進医療会議の議事録の読み上げデータセットです。"
            }
        },
        "Tutorial": {
            "https://github.com/yuibi/spacy_tutorial": {
                "sub_category": null,
                "repository_name": "spacy_tutorial",
                "user_name": "yuibi",
                "description": "spaCy tutorial in English and Japanese. spacy-transformers, BERT, GiNZA."
            },
            "https://github.com/icoxfog417/fastTextJapaneseTutorial": {
                "sub_category": null,
                "repository_name": "fastTextJapaneseTutorial",
                "user_name": "icoxfog417",
                "description": "Tutorial to train fastText with Japanese corpus"
            },
            "https://github.com/shunk031/allennlp-NER-ja": {
                "sub_category": null,
                "repository_name": "allennlp-NER-ja",
                "user_name": "shunk031",
                "description": "AllenNLP-NER-ja: AllenNLP による日本語を対象とした固有表現抽出"
            },
            "https://github.com/ymym3412/chariot-PyTorch-Japanese-text-classification": {
                "sub_category": null,
                "repository_name": "chariot-PyTorch-Japanese-text-classification",
                "user_name": "ymym3412",
                "description": "Experiment for Japanese Text classification using chariot and PyTorch"
            },
            "https://github.com/poyo46/ginza-examples": {
                "sub_category": null,
                "repository_name": "ginza-examples",
                "user_name": "poyo46",
                "description": "日本語NLPライブラリGiNZAのすゝめ"
            },
            "https://github.com/nekoumei/DocumentClassificationUsingBERT-Japanese": {
                "sub_category": null,
                "repository_name": "DocumentClassificationUsingBERT-Japanese",
                "user_name": "nekoumei",
                "description": "DocumentClassificationUsingBERT-Japanese"
            },
            "https://github.com/YutaroOgawa/BERT_Japanese_Google_Colaboratory": {
                "sub_category": null,
                "repository_name": "BERT_Japanese_Google_Colaboratory",
                "user_name": "YutaroOgawa",
                "description": "Google Colaboratoryで日本語のBERTを動かす方法です。"
            },
            "https://github.com/stockmarkteam/bert-book": {
                "sub_category": null,
                "repository_name": "bert-book",
                "user_name": "stockmarkteam",
                "description": "「BERTによる自然言語処理入門: Transformersを使った実践プログラミング」サポートページ"
            },
            "https://github.com/mocobeta/janome-tutorial": {
                "sub_category": null,
                "repository_name": "janome-tutorial",
                "user_name": "mocobeta",
                "description": "Janome を使ったテキストマイニング入門チュートリアルです。"
            },
            "https://github.com/hnishi/handson-language-models": {
                "sub_category": null,
                "repository_name": "handson-language-models",
                "user_name": "hnishi",
                "description": "日本語の言語モデルのハンズオン資料です"
            },
            "https://github.com/verypluming/JapaneseNLI": {
                "sub_category": null,
                "repository_name": "JapaneseNLI",
                "user_name": "verypluming",
                "description": "Google Colabで日本語テキスト推論を試す"
            },
            "https://github.com/Gin5050/deep-learning-with-pytorch-ja": {
                "sub_category": null,
                "repository_name": "deep-learning-with-pytorch-ja",
                "user_name": "Gin5050",
                "description": "deep-learning-with-pytorchの日本語版repositoryです。"
            }
        },
        "Research summary": {
            "https://github.com/himkt/awesome-bert-japanese": {
                "sub_category": null,
                "repository_name": "awesome-bert-japanese",
                "user_name": "himkt",
                "description": "A list of pre-trained BERT models for Japanese with word/subword tokenization + vocabulary construction algorithm information"
            },
            "https://github.com/gotutiyan/GEC-Info-ja": {
                "sub_category": null,
                "repository_name": "GEC-Info-ja",
                "user_name": "gotutiyan",
                "description": "文法誤り訂正に関する日本語文献を収集・分類するためのリポジトリ"
            },
            "https://github.com/ikegami-yukino/dataset-list": {
                "sub_category": null,
                "repository_name": "dataset-list",
                "user_name": "ikegami-yukino",
                "description": "lists of text corpus and more (mainly Japanese)"
            },
            "https://github.com/Valkyrja3607/tuning_playbook_ja": {
                "sub_category": null,
                "repository_name": "tuning_playbook_ja",
                "user_name": "Valkyrja3607",
                "description": "ディープラーニングモデルの性能を体系的に最大化するためのプレイブック"
            },
            "https://github.com/olety/japanese-pitch-accent-resources": {
                "sub_category": null,
                "repository_name": "japanese-pitch-accent-resources",
                "user_name": "olety",
                "description": "Trying to consolidate japanese phonetic, and in particular pitch accent resources into one list"
            }
        }
    }
}