diff --git a/llms_txt/_modidx.py b/llms_txt/_modidx.py index d5bd669..9c49179 100644 --- a/llms_txt/_modidx.py +++ b/llms_txt/_modidx.py @@ -6,11 +6,11 @@ 'git_url': 'https://github.com/AnswerDotAI/llms-txt', 'lib_path': 'llms_txt'}, 'syms': { 'llms_txt.core': { 'llms_txt.core._doc': ('core.html#_doc', 'llms_txt/core.py'), + 'llms_txt.core._local_docs_pth': ('core.html#_local_docs_pth', 'llms_txt/core.py'), 'llms_txt.core._parse_links': ('core.html#_parse_links', 'llms_txt/core.py'), 'llms_txt.core._parse_llms': ('core.html#_parse_llms', 'llms_txt/core.py'), 'llms_txt.core._section': ('core.html#_section', 'llms_txt/core.py'), 'llms_txt.core.create_ctx': ('core.html#create_ctx', 'llms_txt/core.py'), - 'llms_txt.core.find_root_dir': ('core.html#find_root_dir', 'llms_txt/core.py'), 'llms_txt.core.get_doc_content': ('core.html#get_doc_content', 'llms_txt/core.py'), 'llms_txt.core.get_sizes': ('core.html#get_sizes', 'llms_txt/core.py'), 'llms_txt.core.llms_txt2ctx': ('core.html#llms_txt2ctx', 'llms_txt/core.py'), diff --git a/llms_txt/core.py b/llms_txt/core.py index fd37dac..e8a43bc 100644 --- a/llms_txt/core.py +++ b/llms_txt/core.py @@ -3,8 +3,8 @@ # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/01_core.ipynb. # %% auto 0 -__all__ = ['opt_re', 'named_re', 'search', 'parse_link', 'parse_llms_file', 'find_root_dir', 'get_doc_content', 'mk_ctx', - 'get_sizes', 'create_ctx', 'llms_txt2ctx'] +__all__ = ['opt_re', 'named_re', 'search', 'parse_link', 'parse_llms_file', 'get_doc_content', 'mk_ctx', 'get_sizes', + 'create_ctx', 'llms_txt2ctx'] # %% ../nbs/01_core.ipynb import re @@ -15,6 +15,7 @@ from fastcore.script import * import httpx from urllib.parse import urlparse +from nbdev.config import get_config # %% ../nbs/01_core.ipynb def opt_re(s): @@ -68,28 +69,15 @@ def parse_llms_file(txt): from fastcore.xml import Sections,Project,Doc # %% ../nbs/01_core.ipynb -def find_root_dir(): - "Find the root directory of the nbdev project by looking for settings.ini" - path = Path.cwd() - while path != path.parent: - if (path / 'settings.ini').exists(): return path - path = path.parent - return None +def _local_docs_pth(cfg): return cfg.config_path/'_proc'/cfg.doc_path.name -# %% ../nbs/01_core.ipynb def get_doc_content(url): "Fetch content from local file if in nbdev repo." - root_dir = find_root_dir() - if root_dir: - config = Config(root_dir, 'settings.ini') - doc_host = config.get('doc_host') - if doc_host and url.startswith(doc_host): - parsed_url = urlparse(url) - relative_path = parsed_url.path.lstrip('/') - local_path = root_dir / '_docs' / relative_path - if local_path.exists(): - with open(local_path, 'r') as f: return f.read() - # If not a local file or file doesn't exist, fetch from URL + cfg = get_config() + if url.startswith(cfg.doc_host): + relative_path = urlparse(url).path.lstrip('/') + local_path = _local_docs_pth(cfg) / relative_path + if local_path.exists(): return local_path.read_text() return httpx.get(url).text # %% ../nbs/01_core.ipynb @@ -131,7 +119,12 @@ def create_ctx(txt, optional=False, n_workers=None): def llms_txt2ctx( fname:str, # File name to read optional:bool_arg=False, # Include 'optional' section? - n_workers:int=None # Number of threads to use for parallel downloading + n_workers:int=None, # Number of threads to use for parallel downloading + save_nbdev_fname:str=None #save output to nbdev `{docs_path}` instead of emitting to stdout ): "Print a `Project` with a `Section` for each H2 part in file read from `fname`, optionally skipping the 'optional' section." - print(create_ctx(Path(fname).read_text(), optional=optional, n_workers=n_workers)) + ctx = create_ctx(Path(fname).read_text(), optional=optional, n_workers=n_workers) + if save_nbdev_fname: + cfg = get_config() + (_local_docs_pth(cfg) / save_nbdev_fname).mk_write(ctx) + else: print(ctx) diff --git a/nbs/01_core.ipynb b/nbs/01_core.ipynb index c0aefc1..c65c9fa 100644 --- a/nbs/01_core.ipynb +++ b/nbs/01_core.ipynb @@ -50,7 +50,8 @@ "from fastcore.xml import *\n", "from fastcore.script import *\n", "import httpx\n", - "from urllib.parse import urlparse" + "from urllib.parse import urlparse\n", + "from nbdev.config import get_config" ] }, { @@ -185,7 +186,7 @@ { "data": { "text/plain": [ - "{'title': 'FastHTML quick start'}" + "{'title': 'internal docs - ed'}" ] }, "execution_count": null, @@ -217,8 +218,7 @@ { "data": { "text/plain": [ - "{'title': 'FastHTML quick start',\n", - " 'url': 'https://docs.fastht.ml/tutorials/quickstart_for_web_devs.html.md'}" + "{'title': 'internal docs - ed', 'url': 'https://llmstxt.org/ed.html'}" ] }, "execution_count": null, @@ -674,22 +674,6 @@ "from fastcore.xml import Sections,Project,Doc" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "def find_root_dir():\n", - " \"Find the root directory of the nbdev project by looking for settings.ini\"\n", - " path = Path.cwd()\n", - " while path != path.parent:\n", - " if (path / 'settings.ini').exists(): return path\n", - " path = path.parent\n", - " return None" - ] - }, { "cell_type": "code", "execution_count": null, @@ -697,19 +681,15 @@ "outputs": [], "source": [ "#|export\n", + "def _local_docs_pth(cfg): return cfg.config_path/'_proc'/cfg.doc_path.name\n", + "\n", "def get_doc_content(url):\n", " \"Fetch content from local file if in nbdev repo.\"\n", - " root_dir = find_root_dir()\n", - " if root_dir:\n", - " config = Config(root_dir, 'settings.ini')\n", - " doc_host = config.get('doc_host')\n", - " if doc_host and url.startswith(doc_host):\n", - " parsed_url = urlparse(url)\n", - " relative_path = parsed_url.path.lstrip('/')\n", - " local_path = root_dir / '_docs' / relative_path\n", - " if local_path.exists():\n", - " with open(local_path, 'r') as f: return f.read()\n", - " # If not a local file or file doesn't exist, fetch from URL\n", + " cfg = get_config()\n", + " if url.startswith(cfg.doc_host):\n", + " relative_path = urlparse(url).path.lstrip('/')\n", + " local_path = _local_docs_pth(cfg) / relative_path\n", + " if local_path.exists(): return local_path.read_text()\n", " return httpx.get(url).text" ] }, @@ -797,7 +777,8 @@ { "data": { "text/plain": [ - "{'docs': {'FastHTML quick start': 27376,\n", + "{'docs': {'internal docs - ed': 34464,\n", + " 'FastHTML quick start': 27376,\n", " 'HTMX reference': 26427,\n", " 'Starlette quick guide': 7936},\n", " 'examples': {'Todo list application': 18558},\n", @@ -821,7 +802,7 @@ { "data": { "text/plain": [ - "129814" + "164321" ] }, "execution_count": null, @@ -858,10 +839,24 @@ "def llms_txt2ctx(\n", " fname:str, # File name to read\n", " optional:bool_arg=False, # Include 'optional' section?\n", - " n_workers:int=None # Number of threads to use for parallel downloading\n", + " n_workers:int=None, # Number of threads to use for parallel downloading\n", + " save_nbdev_fname:str=None #save output to nbdev `{docs_path}` instead of emitting to stdout\n", "):\n", " \"Print a `Project` with a `Section` for each H2 part in file read from `fname`, optionally skipping the 'optional' section.\"\n", - " print(create_ctx(Path(fname).read_text(), optional=optional, n_workers=n_workers))" + " ctx = create_ctx(Path(fname).read_text(), optional=optional, n_workers=n_workers)\n", + " if save_nbdev_fname:\n", + " cfg = get_config()\n", + " (_local_docs_pth(cfg) / save_nbdev_fname).mk_write(ctx)\n", + " else: print(ctx)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "Path('/Users/hamel/github/fastcore/_docs/llms-ctx-full.txt').mk_write('hello')" ] }, { diff --git a/settings.ini b/settings.ini index 0ad178f..6764eba 100644 --- a/settings.ini +++ b/settings.ini @@ -27,7 +27,7 @@ keywords = nbdev jupyter notebook python LLMs NLP language = English status = 3 user = AnswerDotAI -requirements = fastcore>=1.7.3 httpx +requirements = fastcore>=1.7.3 httpx nbdev conda_user = fastai console_scripts = llms_txt2ctx=llms_txt.core:llms_txt2ctx readme_nb = index.ipynb