Skip to content

Commit

Permalink
Merge pull request #24 from bigdata-ustc/docs
Browse files Browse the repository at this point in the history
[BUILD] Initialization of the autodoc using sphinx
  • Loading branch information
tswsxk authored Aug 8, 2021
2 parents 65459a4 + 3bff51d commit 336fe38
Show file tree
Hide file tree
Showing 32 changed files with 577 additions and 55 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/python-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
pip install -e .[test]
pip install -e .[test,full]
pip install codecov
- name: Test with pytest
run: |
Expand Down
28 changes: 28 additions & 0 deletions .readthedocs.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# .readthedocs.yml
# Read the Docs configuration file
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details

# Required
version: 2

# Build documentation in the docs/ directory with Sphinx
sphinx:
configuration: docs/source/conf.py

# Build documentation with MkDocs
#mkdocs:
# configuration: mkdocs.yml

# Optionally build your docs in additional formats such as PDF and ePub
formats: []

# Optionally set the version of Python and requirements
# required to build your docs
python:
version: 3.7
install:
- requirements: docs/requirements.txt
- method: pip
path: .
extra_requirements:
- full
17 changes: 10 additions & 7 deletions EduNLP/Formula/ast/ast.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,18 @@ def ast(formula: (str, List[Dict]), index=0, forest_begin=0, father_tree=None, i
Notes
----------
Some functions are not supportd in katex
eg :
Some functions are not supportd in ``katex``
e.g.,
1. tag
'\\begin{equation} \\tag{tagName} F=ma \\end{equation}'
'\\begin{align} \\tag{1} y=x+z \\end{align}'
'\\tag*{hi} x+y^{2x}'
- ``\\begin{equation} \\tag{tagName} F=ma \\end{equation}``
- ``\\begin{align} \\tag{1} y=x+z \\end{align}``
- ``\\tag*{hi} x+y^{2x}``
2. dddot
'\\frac{ \\dddot y }{ x }'
3. see other: https://github.com/KaTeX/KaTeX/blob/master/docs/support_table.md
- ``\\frac{ \\dddot y }{ x }``
For more information, refer to
`katex support table <https://github.com/KaTeX/KaTeX/blob/master/docs/support_table.md>`_
"""
tree = []
index += forest_begin
Expand Down
45 changes: 29 additions & 16 deletions EduNLP/I2V/i2v.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,24 @@


class I2V(object):
"""
Parameters
----------
tokenizer: str
the tokenizer name
t2v: str
the name of token2vector model
args:
the parameters passed to t2v
tokenizer_kwargs: dict
the parameters passed to tokenizer
pretrained_t2v: bool
kwargs:
the parameters passed to t2v
"""
def __init__(self, tokenizer, t2v, *args, tokenizer_kwargs: dict = None, pretrained_t2v=False, **kwargs):
"""
Parameters
----------
tokenizer: str
the tokenizer name
t2v: str
the name of token2vector model
args:
the parameters passed to t2v
tokenizer_kwargs: dict
the parameters passed to tokenizer
pretrained_t2v: bool
kwargs:
the parameters passed to t2v
"""

self.tokenizer: Tokenizer = get_tokenizer(tokenizer, **tokenizer_kwargs if tokenizer_kwargs is not None else {})
if pretrained_t2v:
logger.info("Use pretrained t2v model %s" % t2v)
Expand Down Expand Up @@ -101,6 +102,18 @@ def from_pretrained(cls, name, model_dir=MODEL_DIR, *args, **kwargs):


def get_pretrained_i2v(name, model_dir=MODEL_DIR):
"""
Parameters
----------
name
model_dir
Returns
-------
i2v model: I2V
"""
if name not in MODELS:
raise KeyError(
"Unknown model name %s, use one of the provided models: %s" % (name, ", ".join(MODELS.keys()))
Expand Down
2 changes: 2 additions & 0 deletions EduNLP/SIF/sif.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
from .tokenization import tokenize, link_formulas
from .parser import Parser

__all__ = ["is_sif", "to_sif", "sif4sci"]


def is_sif(item):
r"""
Expand Down
2 changes: 1 addition & 1 deletion EduNLP/SIF/tokenization/text/stopwords.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from EduNLP.utils import abs_current_dir, path_append

DEFAULT_FILEPATH = os.path.abspath(
path_append(abs_current_dir(__file__), "..", "..", "..", "..", "meta_data", "sif_stopwords.txt")
path_append(abs_current_dir(__file__), "..", "..", "..", "meta_data", "sif_stopwords.txt")
)


Expand Down
5 changes: 3 additions & 2 deletions EduNLP/SIF/tokenization/text/tokenization.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@
jieba.setLogLevel(logging.INFO)


def tokenize(text, granularity="word", stopwords=DEFAULT_STOPWORDS):
def tokenize(text, granularity="word", stopwords="default"):
"""
Parameters
----------
text
granularity
stopwords
stopwords: str, None or set
Returns
-------
Expand All @@ -26,6 +26,7 @@ def tokenize(text, granularity="word", stopwords=DEFAULT_STOPWORDS):
>>> tokenize("三角函数是基本初等函数之一", granularity="char")
['三', '角', '函', '数', '基', '初', '函', '数']
"""
stopwords = DEFAULT_STOPWORDS if stopwords == "default" else stopwords
stopwords = stopwords if stopwords is not None else {}
if granularity == "word":
return [token for token in jieba.cut(text) if token not in stopwords and token.strip()]
Expand Down
5 changes: 5 additions & 0 deletions EduNLP/SIF/tokenization/tokenization.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@


class TokenList(object):
"""
Attributes
-------------
"""
def __init__(self, segment_list: SegmentList, text_params=None, formula_params=None, figure_params=None):
self._tokens = []
self._text_tokens = []
Expand Down
File renamed without changes.
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
recursive-include EduNLP/meta_data *
35 changes: 20 additions & 15 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,44 +7,49 @@
[![PyPI](https://img.shields.io/pypi/v/EduNLP.svg)](https://pypi.python.org/pypi/EduNLP)
[![test](https://github.com/bigdata-ustc/EduNLP/actions/workflows/python-test.yml/badge.svg?branch=master)](https://github.com/bigdata-ustc/EduNLP/actions/workflows/python-test.yml)
[![codecov](https://codecov.io/gh/bigdata-ustc/EduNLP/branch/master/graph/badge.svg?token=B7gscOGQLD)](https://codecov.io/gh/bigdata-ustc/EduNLP)
[![Documentation Status](https://readthedocs.org/projects/edunlp/badge/?version=latest)](https://edunlp.readthedocs.io/en/latest/?badge=latest)
[![Download](https://img.shields.io/pypi/dm/EduNLP.svg?style=flat)](https://pypi.python.org/pypi/EduNLP)
[![License](https://img.shields.io/github/license/bigdata-ustc/EduNLP)](LICENSE)
[![DOI](https://zenodo.org/badge/332661206.svg)](https://zenodo.org/badge/latestdoi/332661206)

NLP tools for Educational data (e.g., exercise, papers)

## Introduction
EduNLP is a library for advanced Natural Language Processing in Python and is one of the projects of EduX plan of BDAA. It's built on the very latest research, and was designed from day one to be used in real educational products.
EduNLP is a library for advanced Natural Language Processing in Python and is one of the projects of [EduX]((https://github.com/bigdata-ustc/EduX)) plan of [BDAA](https://github.com/bigdata-ustc). It's built on the very latest research, and was designed from day one to be used in real educational products.

EduNLP now comes with pretrained pipelines and currently supports segment, tokenization and vertorization. It supports varies of preprocessing for NLP in educational scenario, such as formula parsing, multi-modal segment.

EduNLP is commercial open-source software, released under the Apache-2.0 license.
EduNLP is commercial open-source software, released under the [Apache-2.0 license](LICENSE).

## Quickstart

### Installation

Git and install by pip
```
pip install -e .
``` sh
# basic installation
pip install .

# full installation
pip install .[full]
```
or install from pypi:
```
# basic installation
pip install EduNLP
```
### Resource
We will continously publish new datasets in [Standard Item Format (SIF)](https://github.com/bigdata-ustc/EduNLP/blob/master/docs/SIF4TI_CH.md) to encourage the relavant research works. The data resourses can be accessed via another EduX project [EduData](https://github.com/bigdata-ustc/EduData)
# full installation
pip install EduNLP[full]
```

### Tutorial

* Overview (TBA)
* [Formula Parsing](https://github.com/bigdata-ustc/EduNLP/blob/master/examples/formula/formula.ipynb)
* [Segment and Tokenization](https://github.com/bigdata-ustc/EduNLP/blob/master/examples/sif/sif.ipynb)
* [Vectorization](https://github.com/bigdata-ustc/EduNLP/tree/master/examples/pretrain)
* Pretrained Model (TBA)
For more details, please refer to the full documentation ([latest](https://edunlp.readthedocs.io/en/latest) | [stable](https://edunlp.readthedocs.io/en/stable)).

### Resource
We will continuously publish new datasets in [Standard Item Format (SIF)](https://github.com/bigdata-ustc/EduNLP/blob/master/docs/SIF4TI_CH.md) to encourage the relevant research works. The data resources can be accessed via another EduX project [EduData](https://github.com/bigdata-ustc/EduData)

## Contribute

EduNLP is still under development. More algorithms and features are going to be added and we always welcome contributions to help make EduNLP better. If you would like to contribute, please follow this [guideline](CONTRIBUTE.md).
EduNLP is still under development. More algorithms and features are going to be added and we always welcome contributions to help make EduNLP better. If you would like to contribute, please follow this [guideline](CONTRIBUTE.md)([开发指南](CONTRIBUTE_CH.md)).

## Citation

Expand Down
20 changes: 20 additions & 0 deletions docs/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Minimal makefile for Sphinx documentation
#

# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS ?=
SPHINXBUILD ?= sphinx-build
SOURCEDIR = source
BUILDDIR = build

# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

.PHONY: help Makefile

# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
29 changes: 29 additions & 0 deletions docs/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
EduNLP document and tutorial folder
===================================

Requirements
------------
See the requirements `docs_deps` in `setup.py`:
```sh
pip install -e .[doc]
```


Build documents
---------------
First, clean up existing files:
```
make clean
```

Then build:
```
make html
```

Render locally
--------------
```
cd build/html
python3 -m http.server 8000
```
35 changes: 35 additions & 0 deletions docs/make.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
@ECHO OFF

pushd %~dp0

REM Command file for Sphinx documentation

if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=source
set BUILDDIR=build

if "%1" == "" goto help

%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
echo.
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
echo.installed, then set the SPHINXBUILD environment variable to point
echo.to the full path of the 'sphinx-build' executable. Alternatively you
echo.may add the Sphinx directory to PATH.
echo.
echo.If you don't have Sphinx installed, grab it from
echo.http://sphinx-doc.org/
exit /b 1
)

%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
goto end

:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%

:end
popd
5 changes: 5 additions & 0 deletions docs/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
sphinx
sphinx_rtd_theme
sphinx_toggleprompt
sphinx-gallery>=0.6
nbsphinx
Binary file added docs/source/_static/EduNLP.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
6 changes: 6 additions & 0 deletions docs/source/api/formula.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
EduNLP.Formula
=======================

.. automodule:: EduNLP.Formula.ast
:members:
:imported-members:
6 changes: 6 additions & 0 deletions docs/source/api/i2v.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
EduNLP.I2V
============

.. automodule:: EduNLP.I2V.i2v
:members:
:imported-members:
2 changes: 2 additions & 0 deletions docs/source/api/index.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
EduNLP
======
Loading

0 comments on commit 336fe38

Please sign in to comment.