Skip to content

Commit

Permalink
fixes #6
Browse files Browse the repository at this point in the history
  • Loading branch information
jph00 committed Sep 8, 2024
1 parent b9d2bff commit 0536f0c
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 24 deletions.
23 changes: 13 additions & 10 deletions llms_txt/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ def named_re(nm, pat):

def search(pat, txt, flags=0):
"Dictionary of matched groups in `pat` within `txt`"
return re.search(pat, txt, flags=flags).groupdict()
res = re.search(pat, txt, flags=flags)
return res.groupdict() if res else None

# %% ../nbs/01_core.ipynb
def parse_link(txt):
Expand Down Expand Up @@ -65,22 +66,23 @@ def parse_llms_file(txt):
from fastcore.xml import Sections,Project,Doc

# %% ../nbs/01_core.ipynb
def _doc(url, **kw):
def _doc(kw):
"Create a `Doc` FT object with the text retrieved from `url` as the child, and `kw` as attrs."
url = kw.pop('url')
re_comment = re.compile('^<!--.*-->$', flags=re.MULTILINE)
txt = [o for o in httpx.get(url).text.splitlines() if not re_comment.search(o)]
return Doc('\n'.join(txt), **kw)

# %% ../nbs/01_core.ipynb
def _section(nm, items):
def _section(nm, items, n_workers=None):
"Create a section containing a `Doc` object for each child."
return ft(nm, *[_doc(**o) for o in items])
return ft(nm, *parallel(_doc, items, n_workers=n_workers, threadpool=True))

# %% ../nbs/01_core.ipynb
def mk_ctx(d, optional=True):
def mk_ctx(d, optional=True, n_workers=None):
"Create a `Project` with a `Section` for each H2 part in `d`, optionally skipping the 'optional' section."
skip = '' if optional else 'Optional'
sections = [_section(k, v) for k,v in d.sections.items() if k!=skip]
sections = [_section(k, v, n_workers=n_workers) for k,v in d.sections.items() if k!=skip]
return Project(title=d.title, summary=d.summary)(d.info, *sections)

# %% ../nbs/01_core.ipynb
Expand All @@ -89,17 +91,18 @@ def get_sizes(ctx):
return {o.tag:{p.title:len(p.children[0]) for p in o.children} for o in ctx.children if hasattr(o,'tag')}

# %% ../nbs/01_core.ipynb
def create_ctx(txt, optional=False):
def create_ctx(txt, optional=False, n_workers=None):
"A `Project` with a `Section` for each H2 part in `txt`, optionally skipping the 'optional' section."
d = parse_llms_file(txt)
ctx = mk_ctx(d, optional=optional)
ctx = mk_ctx(d, optional=optional, n_workers=n_workers)
return to_xml(ctx, do_escape=False)

# %% ../nbs/01_core.ipynb
@call_parse
def llms_txt2ctx(
fname:str, # File name to read
optional:bool_arg=False # Include 'optional' section?
optional:bool_arg=False, # Include 'optional' section?
n_workers:int=None # Number of threads to use for parallel downloading
):
"Print a `Project` with a `Section` for each H2 part in file read from `fname`, optionally skipping the 'optional' section."
print(create_ctx(Path(fname).read_text(), optional=optional))
print(create_ctx(Path(fname).read_text(), optional=optional, n_workers=n_workers))
29 changes: 16 additions & 13 deletions nbs/01_core.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,8 @@
"\n",
"def search(pat, txt, flags=0):\n",
" \"Dictionary of matched groups in `pat` within `txt`\"\n",
" return re.search(pat, txt, flags=flags).groupdict()"
" res = re.search(pat, txt, flags=flags)\n",
" return res.groupdict() if res else None"
]
},
{
Expand Down Expand Up @@ -679,8 +680,9 @@
"outputs": [],
"source": [
"#| export\n",
"def _doc(url, **kw):\n",
"def _doc(kw):\n",
" \"Create a `Doc` FT object with the text retrieved from `url` as the child, and `kw` as attrs.\"\n",
" url = kw.pop('url')\n",
" re_comment = re.compile('^<!--.*-->$', flags=re.MULTILINE)\n",
" txt = [o for o in httpx.get(url).text.splitlines() if not re_comment.search(o)]\n",
" return Doc('\\n'.join(txt), **kw)"
Expand All @@ -693,9 +695,9 @@
"outputs": [],
"source": [
"#| export\n",
"def _section(nm, items):\n",
"def _section(nm, items, n_workers=None):\n",
" \"Create a section containing a `Doc` object for each child.\"\n",
" return ft(nm, *[_doc(**o) for o in items])"
" return ft(nm, *parallel(_doc, items, n_workers=n_workers, threadpool=True))"
]
},
{
Expand All @@ -705,10 +707,10 @@
"outputs": [],
"source": [
"#| export\n",
"def mk_ctx(d, optional=True):\n",
"def mk_ctx(d, optional=True, n_workers=None):\n",
" \"Create a `Project` with a `Section` for each H2 part in `d`, optionally skipping the 'optional' section.\"\n",
" skip = '' if optional else 'Optional'\n",
" sections = [_section(k, v) for k,v in d.sections.items() if k!=skip]\n",
" sections = [_section(k, v, n_workers=n_workers) for k,v in d.sections.items() if k!=skip]\n",
" return Project(title=d.title, summary=d.summary)(d.info, *sections)"
]
},
Expand Down Expand Up @@ -753,10 +755,10 @@
{
"data": {
"text/plain": [
"{'docs': {'FastHTML quick start': 25803,\n",
"{'docs': {'FastHTML quick start': 27376,\n",
" 'HTMX reference': 26427,\n",
" 'Starlette quick guide': 7936},\n",
" 'examples': {'Todo list application': 18588},\n",
" 'examples': {'Todo list application': 18558},\n",
" 'optional': {'Starlette full documentation': 48331}}"
]
},
Expand All @@ -777,7 +779,7 @@
{
"data": {
"text/plain": [
"128271"
"129814"
]
},
"execution_count": null,
Expand All @@ -796,10 +798,10 @@
"outputs": [],
"source": [
"#| export\n",
"def create_ctx(txt, optional=False):\n",
"def create_ctx(txt, optional=False, n_workers=None):\n",
" \"A `Project` with a `Section` for each H2 part in `txt`, optionally skipping the 'optional' section.\"\n",
" d = parse_llms_file(txt)\n",
" ctx = mk_ctx(d, optional=optional)\n",
" ctx = mk_ctx(d, optional=optional, n_workers=n_workers)\n",
" return to_xml(ctx, do_escape=False)"
]
},
Expand All @@ -813,10 +815,11 @@
"@call_parse\n",
"def llms_txt2ctx(\n",
" fname:str, # File name to read\n",
" optional:bool_arg=False # Include 'optional' section?\n",
" optional:bool_arg=False, # Include 'optional' section?\n",
" n_workers:int=None # Number of threads to use for parallel downloading\n",
"):\n",
" \"Print a `Project` with a `Section` for each H2 part in file read from `fname`, optionally skipping the 'optional' section.\"\n",
" print(create_ctx(Path(fname).read_text(), optional=optional))"
" print(create_ctx(Path(fname).read_text(), optional=optional, n_workers=n_workers))"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion nbs/llms.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

## Docs

- [llms.txt proposal](https://llmstxt.org/index-commonmark.md): The proposal for llms.txt
- [llms.txt proposal](https://llmstxt.org/index.md): The proposal for llms.txt
- [Python library docs](https://llmstxt.org/intro.html.md): Docs for `llms-txt` python lib
- [ed demo](https://llmstxt.org/ed-commonmark.md): Tongue-in-cheek example of how llms.txt could be used in the classic `ed` editor, used to show how editors could incorporate llms.txt in general.

0 comments on commit 0536f0c

Please sign in to comment.