diff --git a/src/llm_datasets/datasets/bg/bgnc_admin_eur.py b/src/llm_datasets/datasets/bg/bgnc_admin_eur.py
deleted file mode 100644
index 879ab71..0000000
--- a/src/llm_datasets/datasets/bg/bgnc_admin_eur.py
+++ /dev/null
@@ -1,70 +0,0 @@
-import logging
-
-from tqdm.auto import tqdm
-
-from llm_datasets.datasets.base import MB, Availability, BaseDataset
-
-logger = logging.getLogger(__name__)
-
-
-# deprecated -> use bulnc instead!
-class BGNCAdminEURDataset(BaseDataset):
-    """Part of Bulgarian National Corpus
-
-    TODO overlap with eurlex_bg?
-    """
-
-    DATASET_ID = "bgnc_admin_eur"
-    TITLE = "ADMIN_EUR Corpus of EU legislation (bg)"
-    HOMEPAGE = "https://eur-lex.europa.eu/homepage.html"
-
-    AVAILIBILITY = Availability.DIRECT_DOWNLOAD
-
-    LANGUAGES = ["bg"]
-    HAS_OVERLAP_WITH = [
-        "bulnc",
-    ]
-    DOWNLOAD_URLS = ["https://dcl.bas.bg/BulNC-registration/dl.php?dl=feeds/ADMIN_EUR.BG.zip"]
-
-    BYTES = 257 * MB
-
-    def download(self):
-        """DOWNLOAD
-        -----------
-
-        Instruction
-
-        - Downloaded locally by clicking on the download link in the browser:
-
-        https://dcl.bas.bg/BulNC-registration/dl.php?dl=feeds/ADMIN_EUR.BG.zip
-
-        - Copy local file to server:
-
-        scp /Local/Path/to/ADMIN_EUR.BG.zip username@clustername:/data/datasets/ele/bg/BgNC/admin_eur
-
-        - Extract files:
-
-        unzip ADMIN_EUR.BG.zip
-
-        """
-        pass
-
-    def decompress(self):
-        # unzip ADMIN_EUR.BG.zip
-        pass
-
-    def get_texts(self):
-        files_path = self.get_dataset_file_paths(subdirectories=True, needed_suffix=".txt")
-
-        logger.info(f"Found {len(files_path):,} files")
-
-        for input_file in tqdm(files_path, desc="Reading files"):
-            # skip if is metadata
-            if "METADATA" in input_file:
-                logger.warning(f"Skip {input_file}")
-                continue
-
-            # each file is one documentt
-            with open(input_file, "r") as inp:
-                text = inp.read()
-                yield text.strip()
diff --git a/src/llm_datasets/datasets/bg/bgnc_news_corpus.py b/src/llm_datasets/datasets/bg/bgnc_news_corpus.py
deleted file mode 100644
index e69de29..0000000
diff --git a/src/llm_datasets/datasets/bg/bulgarian_news.py b/src/llm_datasets/datasets/bg/bulgarian_news.py
deleted file mode 100644
index 8dba995..0000000
--- a/src/llm_datasets/datasets/bg/bulgarian_news.py
+++ /dev/null
@@ -1,44 +0,0 @@
-import json
-import logging
-from pathlib import Path
-
-from llm_datasets.datasets.base import MB, Availability, BaseDataset, License
-
-logger = logging.getLogger(__name__)
-
-
-class BulgarianNewsDataset(BaseDataset):
-    DATASET_ID = "bulgarian_news"
-    TITLE = "Crawl of Bulgarian news websites"
-    DOWNLOAD_URLS = ["http://old.dcl.bas.bg/dataset/Bulgarian_news.7z"]
-    DESCRIPTION = (
-        "The collection was collected by crawling Bulgarian websites in Bulgarian. Text samples are in json format. We"
-        " can provide raw tests."
-    )
-    WEB_CRAWLED = True
-    LANGUAGES = ["bg"]
-    BYTES = 919 * MB
-    AVAILIBILITY = Availability.ON_REQUEST
-    LICENSE = License("research only")
-
-    def decompress(self):
-        # 7z x Bulgarian_news.7z
-        pass
-
-    def get_texts(self):
-        # read from extracted JSON files
-        for i, file_path in enumerate(Path(self.get_local_dataset_dir()).rglob("*.json")):
-            if self.skip_items > 0 and i < self.skip_items:
-                continue
-
-            with open(file_path) as f:
-                try:
-                    doc = json.load(f)
-                    if "bg_a_text" in doc:
-                        text = self.paragraph_delimiter.join(doc["bg_a_text"])
-                        yield text
-                    else:
-                        logger.warning("JSON has no text field: %s", file_path)
-
-                except ValueError:
-                    logger.error("Cannot parse JSON from %s", file_path)
diff --git a/src/llm_datasets/datasets/bg/bulnc.py b/src/llm_datasets/datasets/bg/bulnc.py
deleted file mode 100644
index 6e91f5c..0000000
--- a/src/llm_datasets/datasets/bg/bulnc.py
+++ /dev/null
@@ -1,48 +0,0 @@
-import logging
-from pathlib import Path
-
-from llm_datasets.datasets.base import GB, Availability, BaseDataset, License
-
-logger = logging.getLogger(__name__)
-
-
-class BulNCDataset(BaseDataset):
-    DATASET_ID = "bulnc"
-    TITLE = "Bulgarian National Corpus"
-    AVAILIBILITY = Availability.ON_REQUEST
-    DOWNLOAD_URLS = ["http://old.dcl.bas.bg/dataset/BulNC.7z"]  # password-protected file!
-    DESCRIPTION = (
-        "The Bulgarian National Corpus contains a wide range of texts in various sizes, media types (written and "
-        "spoken), styles, periods (synchronic and diachronic), and licenses. Each text in the collection is supplied "
-        "with metadata. The Bulgarian National Corpus  was first compiled using the Bulgarian Lexicographic Archive "
-        "and the Text Archive of Written Bulgarian, which account for 55.95% of the corpus. Later, the EMEA corpus "
-        "(medical administrative texts) and the OpenSubtitles corpus (film subtitles) were added, accounting for "
-        "1.27% and 8.61% of the BulNC, respectively. The remaining texts were crawled automatically and include a "
-        "large number of administrative texts, news from monolingual and multilingual sources, scientific texts, and "
-        "popular science. The BulNC is not fully downloadable due to the inclusion of copyrighted material. We've "
-        "provided a link to a password-protected archive for evaluation."
-    )
-    AVAILIBILITY = Availability.ON_REQUEST
-    LICENSE = License("research only", sharealike=False)
-    LANGUAGES = ["bg"]
-    BYTES = 1.8 * GB
-
-    def decompress(self):
-        """7z x BulNC.7z
-
-        Folders: 125
-        Files: 256906
-        Size:       13279357395
-        Compressed: 1981942477
-        """
-        pass
-
-    def get_texts(self):
-        # read from extracted TXT files
-        for file_path in Path(self.get_local_dataset_dir()).rglob(
-            "*.txt"
-        ):  # self.get_dataset_file_paths(subdirectories=True, needed_suffix=".txt"):
-            with open(file_path) as f:
-                text = f.read()
-
-                yield text
diff --git a/src/llm_datasets/datasets/dataset_registry.py b/src/llm_datasets/datasets/dataset_registry.py
index 1a237a0..aec6d43 100644
--- a/src/llm_datasets/datasets/dataset_registry.py
+++ b/src/llm_datasets/datasets/dataset_registry.py
@@ -76,11 +76,7 @@
     ".en.pile_of_law.PileOfLawDataset",
     ".en.math_amps.MathAMPSDataset",
     ".en.edgar.EdgarCorpus",
-    # bg
-    # ".bg.bgnc_admin_eur.BGNCAdminEURDataset",  # deprecated -> use bulnc
-    # ".bg.bgnc_news_corpus.BGNCNewsCorpusDataset",  # deprecated -> use bulnc
-    ".bg.bulgarian_news.BulgarianNewsDataset",
-    ".bg.bulnc.BulNCDataset",
+    # bg: all removed
     # de
     ".de.openlegaldata.OpenLegalDataDataset",
     ".de.dewac.DEWacDataset",