Skip to content

Commit

Permalink
Merge pull request #9 from AnswerDotAI/dynamic_docs_dir
Browse files Browse the repository at this point in the history
allow saving of file into directory and clean up the code
  • Loading branch information
jph00 authored Sep 13, 2024
2 parents 5a26fba + dfb74ef commit 8055bdb
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 60 deletions.
2 changes: 1 addition & 1 deletion llms_txt/_modidx.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
'git_url': 'https://github.com/AnswerDotAI/llms-txt',
'lib_path': 'llms_txt'},
'syms': { 'llms_txt.core': { 'llms_txt.core._doc': ('core.html#_doc', 'llms_txt/core.py'),
'llms_txt.core._local_docs_pth': ('core.html#_local_docs_pth', 'llms_txt/core.py'),
'llms_txt.core._parse_links': ('core.html#_parse_links', 'llms_txt/core.py'),
'llms_txt.core._parse_llms': ('core.html#_parse_llms', 'llms_txt/core.py'),
'llms_txt.core._section': ('core.html#_section', 'llms_txt/core.py'),
'llms_txt.core.create_ctx': ('core.html#create_ctx', 'llms_txt/core.py'),
'llms_txt.core.find_root_dir': ('core.html#find_root_dir', 'llms_txt/core.py'),
'llms_txt.core.get_doc_content': ('core.html#get_doc_content', 'llms_txt/core.py'),
'llms_txt.core.get_sizes': ('core.html#get_sizes', 'llms_txt/core.py'),
'llms_txt.core.llms_txt2ctx': ('core.html#llms_txt2ctx', 'llms_txt/core.py'),
Expand Down
39 changes: 16 additions & 23 deletions llms_txt/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/01_core.ipynb.

# %% auto 0
__all__ = ['opt_re', 'named_re', 'search', 'parse_link', 'parse_llms_file', 'find_root_dir', 'get_doc_content', 'mk_ctx',
'get_sizes', 'create_ctx', 'llms_txt2ctx']
__all__ = ['opt_re', 'named_re', 'search', 'parse_link', 'parse_llms_file', 'get_doc_content', 'mk_ctx', 'get_sizes',
'create_ctx', 'llms_txt2ctx']

# %% ../nbs/01_core.ipynb
import re
Expand All @@ -15,6 +15,7 @@
from fastcore.script import *
import httpx
from urllib.parse import urlparse
from nbdev.config import get_config

# %% ../nbs/01_core.ipynb
def opt_re(s):
Expand Down Expand Up @@ -68,28 +69,15 @@ def parse_llms_file(txt):
from fastcore.xml import Sections,Project,Doc

# %% ../nbs/01_core.ipynb
def find_root_dir():
"Find the root directory of the nbdev project by looking for settings.ini"
path = Path.cwd()
while path != path.parent:
if (path / 'settings.ini').exists(): return path
path = path.parent
return None
def _local_docs_pth(cfg): return cfg.config_path/'_proc'/cfg.doc_path.name

# %% ../nbs/01_core.ipynb
def get_doc_content(url):
"Fetch content from local file if in nbdev repo."
root_dir = find_root_dir()
if root_dir:
config = Config(root_dir, 'settings.ini')
doc_host = config.get('doc_host')
if doc_host and url.startswith(doc_host):
parsed_url = urlparse(url)
relative_path = parsed_url.path.lstrip('/')
local_path = root_dir / '_docs' / relative_path
if local_path.exists():
with open(local_path, 'r') as f: return f.read()
# If not a local file or file doesn't exist, fetch from URL
cfg = get_config()
if url.startswith(cfg.doc_host):
relative_path = urlparse(url).path.lstrip('/')
local_path = _local_docs_pth(cfg) / relative_path
if local_path.exists(): return local_path.read_text()
return httpx.get(url).text

# %% ../nbs/01_core.ipynb
Expand Down Expand Up @@ -131,7 +119,12 @@ def create_ctx(txt, optional=False, n_workers=None):
def llms_txt2ctx(
fname:str, # File name to read
optional:bool_arg=False, # Include 'optional' section?
n_workers:int=None # Number of threads to use for parallel downloading
n_workers:int=None, # Number of threads to use for parallel downloading
save_nbdev_fname:str=None #save output to nbdev `{docs_path}` instead of emitting to stdout
):
"Print a `Project` with a `Section` for each H2 part in file read from `fname`, optionally skipping the 'optional' section."
print(create_ctx(Path(fname).read_text(), optional=optional, n_workers=n_workers))
ctx = create_ctx(Path(fname).read_text(), optional=optional, n_workers=n_workers)
if save_nbdev_fname:
cfg = get_config()
(_local_docs_pth(cfg) / save_nbdev_fname).mk_write(ctx)
else: print(ctx)
65 changes: 30 additions & 35 deletions nbs/01_core.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@
"from fastcore.xml import *\n",
"from fastcore.script import *\n",
"import httpx\n",
"from urllib.parse import urlparse"
"from urllib.parse import urlparse\n",
"from nbdev.config import get_config"
]
},
{
Expand Down Expand Up @@ -185,7 +186,7 @@
{
"data": {
"text/plain": [
"{'title': 'FastHTML quick start'}"
"{'title': 'internal docs - ed'}"
]
},
"execution_count": null,
Expand Down Expand Up @@ -217,8 +218,7 @@
{
"data": {
"text/plain": [
"{'title': 'FastHTML quick start',\n",
" 'url': 'https://docs.fastht.ml/tutorials/quickstart_for_web_devs.html.md'}"
"{'title': 'internal docs - ed', 'url': 'https://llmstxt.org/ed.html'}"
]
},
"execution_count": null,
Expand Down Expand Up @@ -674,42 +674,22 @@
"from fastcore.xml import Sections,Project,Doc"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#| export\n",
"def find_root_dir():\n",
" \"Find the root directory of the nbdev project by looking for settings.ini\"\n",
" path = Path.cwd()\n",
" while path != path.parent:\n",
" if (path / 'settings.ini').exists(): return path\n",
" path = path.parent\n",
" return None"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#|export\n",
"def _local_docs_pth(cfg): return cfg.config_path/'_proc'/cfg.doc_path.name\n",
"\n",
"def get_doc_content(url):\n",
" \"Fetch content from local file if in nbdev repo.\"\n",
" root_dir = find_root_dir()\n",
" if root_dir:\n",
" config = Config(root_dir, 'settings.ini')\n",
" doc_host = config.get('doc_host')\n",
" if doc_host and url.startswith(doc_host):\n",
" parsed_url = urlparse(url)\n",
" relative_path = parsed_url.path.lstrip('/')\n",
" local_path = root_dir / '_docs' / relative_path\n",
" if local_path.exists():\n",
" with open(local_path, 'r') as f: return f.read()\n",
" # If not a local file or file doesn't exist, fetch from URL\n",
" cfg = get_config()\n",
" if url.startswith(cfg.doc_host):\n",
" relative_path = urlparse(url).path.lstrip('/')\n",
" local_path = _local_docs_pth(cfg) / relative_path\n",
" if local_path.exists(): return local_path.read_text()\n",
" return httpx.get(url).text"
]
},
Expand Down Expand Up @@ -797,7 +777,8 @@
{
"data": {
"text/plain": [
"{'docs': {'FastHTML quick start': 27376,\n",
"{'docs': {'internal docs - ed': 34464,\n",
" 'FastHTML quick start': 27376,\n",
" 'HTMX reference': 26427,\n",
" 'Starlette quick guide': 7936},\n",
" 'examples': {'Todo list application': 18558},\n",
Expand All @@ -821,7 +802,7 @@
{
"data": {
"text/plain": [
"129814"
"164321"
]
},
"execution_count": null,
Expand Down Expand Up @@ -858,10 +839,24 @@
"def llms_txt2ctx(\n",
" fname:str, # File name to read\n",
" optional:bool_arg=False, # Include 'optional' section?\n",
" n_workers:int=None # Number of threads to use for parallel downloading\n",
" n_workers:int=None, # Number of threads to use for parallel downloading\n",
" save_nbdev_fname:str=None #save output to nbdev `{docs_path}` instead of emitting to stdout\n",
"):\n",
" \"Print a `Project` with a `Section` for each H2 part in file read from `fname`, optionally skipping the 'optional' section.\"\n",
" print(create_ctx(Path(fname).read_text(), optional=optional, n_workers=n_workers))"
" ctx = create_ctx(Path(fname).read_text(), optional=optional, n_workers=n_workers)\n",
" if save_nbdev_fname:\n",
" cfg = get_config()\n",
" (_local_docs_pth(cfg) / save_nbdev_fname).mk_write(ctx)\n",
" else: print(ctx)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"Path('/Users/hamel/github/fastcore/_docs/llms-ctx-full.txt').mk_write('hello')"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion settings.ini
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ keywords = nbdev jupyter notebook python LLMs NLP
language = English
status = 3
user = AnswerDotAI
requirements = fastcore>=1.7.3 httpx
requirements = fastcore>=1.7.3 httpx nbdev
conda_user = fastai
console_scripts = llms_txt2ctx=llms_txt.core:llms_txt2ctx
readme_nb = index.ipynb
Expand Down

0 comments on commit 8055bdb

Please sign in to comment.