From e432b1f9611e569643f0dc29e91f4ece3da21658 Mon Sep 17 00:00:00 2001
From: Ubasic <lmjubuntu@gmail.com>
Date: Mon, 31 Mar 2014 10:45:46 +0800
Subject: [PATCH] Init Commit

---
 .gitignore                  |   2 +
 docs/Makefile               | 177 +++++++++++++++++++
 docs/conf.py                | 343 ++++++++++++++++++++++++++++++++++++
 docs/index.rst              | 324 ++++++++++++++++++++++++++++++++++
 docs/modules/grapy.core.rst |  70 ++++++++
 docs/modules/grapy.rst      |  45 +++++
 docs/modules/modules.rst    |   7 +
 grapy/__init__.py           |   5 +
 grapy/core/__init__.py      |   9 +
 grapy/core/base_sched.py    |  35 ++++
 grapy/core/base_spider.py   |  31 ++++
 grapy/core/engine.py        | 150 ++++++++++++++++
 grapy/core/exceptions.py    |  16 ++
 grapy/core/item.py          | 170 ++++++++++++++++++
 grapy/core/request.py       | 122 +++++++++++++
 grapy/core/response.py      | 209 ++++++++++++++++++++++
 grapy/logging.py            |   2 +
 grapy/sched.py              |  44 +++++
 grapy/utils.py              |  60 +++++++
 setup.py                    |  24 +++
 20 files changed, 1845 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 docs/Makefile
 create mode 100644 docs/conf.py
 create mode 100644 docs/index.rst
 create mode 100644 docs/modules/grapy.core.rst
 create mode 100644 docs/modules/grapy.rst
 create mode 100644 docs/modules/modules.rst
 create mode 100644 grapy/__init__.py
 create mode 100644 grapy/core/__init__.py
 create mode 100644 grapy/core/base_sched.py
 create mode 100644 grapy/core/base_spider.py
 create mode 100644 grapy/core/engine.py
 create mode 100644 grapy/core/exceptions.py
 create mode 100644 grapy/core/item.py
 create mode 100644 grapy/core/request.py
 create mode 100644 grapy/core/response.py
 create mode 100644 grapy/logging.py
 create mode 100644 grapy/sched.py
 create mode 100644 grapy/utils.py
 create mode 100755 setup.py
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..ec85cda
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+*.swp
+*.py[co]
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 0000000..bff84ec
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,177 @@
+# Makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line.
+SPHINXOPTS    =
+SPHINXBUILD   = sphinx-build
+PAPER         =
+BUILDDIR      = _build
+
+# User-friendly check for sphinx-build
+ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
+$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
+endif
+
+# Internal variables.
+PAPEROPT_a4     = -D latex_paper_size=a4
+PAPEROPT_letter = -D latex_paper_size=letter
+ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
+# the i18n builder cannot share the environment and doctrees with the others
+I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
+
+.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
+
+help:
+	@echo "Please use \`make <target>' where <target> is one of"
+	@echo "  html       to make standalone HTML files"
+	@echo "  dirhtml    to make HTML files named index.html in directories"
+	@echo "  singlehtml to make a single large HTML file"
+	@echo "  pickle     to make pickle files"
+	@echo "  json       to make JSON files"
+	@echo "  htmlhelp   to make HTML files and a HTML help project"
+	@echo "  qthelp     to make HTML files and a qthelp project"
+	@echo "  devhelp    to make HTML files and a Devhelp project"
+	@echo "  epub       to make an epub"
+	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
+	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
+	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
+	@echo "  text       to make text files"
+	@echo "  man        to make manual pages"
+	@echo "  texinfo    to make Texinfo files"
+	@echo "  info       to make Texinfo files and run them through makeinfo"
+	@echo "  gettext    to make PO message catalogs"
+	@echo "  changes    to make an overview of all changed/added/deprecated items"
+	@echo "  xml        to make Docutils-native XML files"
+	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
+	@echo "  linkcheck  to check all external links for integrity"
+	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
+
+clean:
+	rm -rf $(BUILDDIR)/*
+
+html:
+	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
+	@echo
+	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
+
+dirhtml:
+	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
+	@echo
+	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
+
+singlehtml:
+	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
+	@echo
+	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
+
+pickle:
+	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
+	@echo
+	@echo "Build finished; now you can process the pickle files."
+
+json:
+	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
+	@echo
+	@echo "Build finished; now you can process the JSON files."
+
+htmlhelp:
+	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
+	@echo
+	@echo "Build finished; now you can run HTML Help Workshop with the" \
+	      ".hhp project file in $(BUILDDIR)/htmlhelp."
+
+qthelp:
+	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
+	@echo
+	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
+	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
+	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/crawl.qhcp"
+	@echo "To view the help file:"
+	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/crawl.qhc"
+
+devhelp:
+	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
+	@echo
+	@echo "Build finished."
+	@echo "To view the help file:"
+	@echo "# mkdir -p $$HOME/.local/share/devhelp/crawl"
+	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/crawl"
+	@echo "# devhelp"
+
+epub:
+	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
+	@echo
+	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
+
+latex:
+	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+	@echo
+	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
+	@echo "Run \`make' in that directory to run these through (pdf)latex" \
+	      "(use \`make latexpdf' here to do that automatically)."
+
+latexpdf:
+	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+	@echo "Running LaTeX files through pdflatex..."
+	$(MAKE) -C $(BUILDDIR)/latex all-pdf
+	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
+
+latexpdfja:
+	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+	@echo "Running LaTeX files through platex and dvipdfmx..."
+	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
+	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
+
+text:
+	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
+	@echo
+	@echo "Build finished. The text files are in $(BUILDDIR)/text."
+
+man:
+	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
+	@echo
+	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
+
+texinfo:
+	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
+	@echo
+	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
+	@echo "Run \`make' in that directory to run these through makeinfo" \
+	      "(use \`make info' here to do that automatically)."
+
+info:
+	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
+	@echo "Running Texinfo files through makeinfo..."
+	make -C $(BUILDDIR)/texinfo info
+	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
+
+gettext:
+	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
+	@echo
+	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
+
+changes:
+	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
+	@echo
+	@echo "The overview file is in $(BUILDDIR)/changes."
+
+linkcheck:
+	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
+	@echo
+	@echo "Link check complete; look for any errors in the above output " \
+	      "or in $(BUILDDIR)/linkcheck/output.txt."
+
+doctest:
+	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
+	@echo "Testing of doctests in the sources finished, look at the " \
+	      "results in $(BUILDDIR)/doctest/output.txt."
+
+xml:
+	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
+	@echo
+	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
+
+pseudoxml:
+	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
+	@echo
+	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
diff --git a/docs/conf.py b/docs/conf.py
new file mode 100644
index 0000000..7bd0e7f
--- /dev/null
+++ b/docs/conf.py
@@ -0,0 +1,343 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+#
+# grapy documentation build configuration file, created by
+# sphinx-quickstart on Thu Dec  5 10:47:15 2013.
+#
+# This file is execfile()d with the current directory set to its
+# containing dir.
+#
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+#
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+
+import sys
+import os
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#sys.path.insert(0, os.path.abspath('.'))
+
+# -- General configuration ------------------------------------------------
+
+# If your documentation needs a minimal Sphinx version, state it here.
+#needs_sphinx = '1.0'
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+    'sphinx.ext.autodoc',
+    'sphinx.ext.doctest',
+    'sphinx.ext.intersphinx',
+    'sphinx.ext.todo',
+    'sphinx.ext.coverage',
+    'sphinx.ext.pngmath',
+    # 'sphinx.ext.mathjax',
+    'sphinx.ext.ifconfig',
+    'sphinx.ext.viewcode',
+]
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix of source filenames.
+source_suffix = '.rst'
+
+# The encoding of source files.
+#source_encoding = 'utf-8-sig'
+
+# The master toctree document.
+master_doc = 'index'
+
+# General information about the project.
+project = 'grapy'
+copyright = '2013, Li Meng Jun'
+
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+#
+# The short X.Y version.
+version = '0.1.5'
+# The full version, including alpha/beta/rc tags.
+release = '0.1.5'
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#language = None
+
+# There are two options for replacing |today|: either, you set today to some
+# non-false value, then it is used:
+#today = ''
+# Else, today_fmt is used as the format for a strftime call.
+#today_fmt = '%B %d, %Y'
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+exclude_patterns = ['_build']
+
+# The reST default role (used for this markup: `text`) to use for all
+# documents.
+#default_role = None
+
+# If true, '()' will be appended to :func: etc. cross-reference text.
+#add_function_parentheses = True
+
+# If true, the current module name will be prepended to all description
+# unit titles (such as .. function::).
+#add_module_names = True
+
+# If true, sectionauthor and moduleauthor directives will be shown in the
+# output. They are ignored by default.
+#show_authors = False
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+
+# A list of ignored prefixes for module index sorting.
+#modindex_common_prefix = []
+
+# If true, keep warnings as "system message" paragraphs in the built documents.
+#keep_warnings = False
+
+
+# -- Options for HTML output ----------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+html_theme = 'default'
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further.  For a list of options available for each theme, see the
+# documentation.
+#html_theme_options = {}
+
+# Add any paths that contain custom themes here, relative to this directory.
+#html_theme_path = []
+
+# The name for this set of Sphinx documents.  If None, it defaults to
+# "<project> v<release> documentation".
+#html_title = None
+
+# A shorter title for the navigation bar.  Default is the same as html_title.
+#html_short_title = None
+
+# The name of an image file (relative to this directory) to place at the top
+# of the sidebar.
+#html_logo = None
+
+# The name of an image file (within the static path) to use as favicon of the
+# docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
+# pixels large.
+#html_favicon = None
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+
+# Add any extra paths that contain custom files (such as robots.txt or
+# .htaccess) here, relative to this directory. These files are copied
+# directly to the root of the documentation.
+#html_extra_path = []
+
+# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
+# using the given strftime format.
+#html_last_updated_fmt = '%b %d, %Y'
+
+# If true, SmartyPants will be used to convert quotes and dashes to
+# typographically correct entities.
+#html_use_smartypants = True
+
+# Custom sidebar templates, maps document names to template names.
+#html_sidebars = {}
+
+# Additional templates that should be rendered to pages, maps page names to
+# template names.
+#html_additional_pages = {}
+
+# If false, no module index is generated.
+#html_domain_indices = True
+
+# If false, no index is generated.
+#html_use_index = True
+
+# If true, the index is split into individual pages for each letter.
+#html_split_index = False
+
+# If true, links to the reST sources are added to the pages.
+#html_show_sourcelink = True
+
+# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
+#html_show_sphinx = True
+
+# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
+#html_show_copyright = True
+
+# If true, an OpenSearch description file will be output, and all pages will
+# contain a <link> tag referring to it.  The value of this option must be the
+# base URL from which the finished HTML is served.
+#html_use_opensearch = ''
+
+# This is the file name suffix for HTML files (e.g. ".xhtml").
+#html_file_suffix = None
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'grapydoc'
+
+
+# -- Options for LaTeX output ---------------------------------------------
+
+latex_elements = {
+# The paper size ('letterpaper' or 'a4paper').
+#'papersize': 'letterpaper',
+
+# The font size ('10pt', '11pt' or '12pt').
+#'pointsize': '10pt',
+
+# Additional stuff for the LaTeX preamble.
+#'preamble': '',
+}
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title,
+#  author, documentclass [howto, manual, or own class]).
+latex_documents = [
+  ('index', 'grapy.tex', 'grapy Documentation',
+   'Li Meng Jun', 'manual'),
+]
+
+# The name of an image file (relative to this directory) to place at the top of
+# the title page.
+#latex_logo = None
+
+# For "manual" documents, if this is true, then toplevel headings are parts,
+# not chapters.
+#latex_use_parts = False
+
+# If true, show page references after internal links.
+#latex_show_pagerefs = False
+
+# If true, show URL addresses after external links.
+#latex_show_urls = False
+
+# Documents to append as an appendix to all manuals.
+#latex_appendices = []
+
+# If false, no module index is generated.
+#latex_domain_indices = True
+
+
+# -- Options for manual page output ---------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [
+    ('index', 'grapy', 'grapy Documentation',
+     ['Li Meng Jun'], 1)
+]
+
+# If true, show URL addresses after external links.
+#man_show_urls = False
+
+
+# -- Options for Texinfo output -------------------------------------------
+
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+#  dir menu entry, description, category)
+texinfo_documents = [
+  ('index', 'grapy', 'grapy Documentation',
+   'Li Meng Jun', 'grapy', 'One line description of project.',
+   'Miscellaneous'),
+]
+
+# Documents to append as an appendix to all manuals.
+#texinfo_appendices = []
+
+# If false, no module index is generated.
+#texinfo_domain_indices = True
+
+# How to display URL addresses: 'footnote', 'no', or 'inline'.
+#texinfo_show_urls = 'footnote'
+
+# If true, do not generate a @detailmenu in the "Top" node's menu.
+#texinfo_no_detailmenu = False
+
+
+# -- Options for Epub output ----------------------------------------------
+
+# Bibliographic Dublin Core info.
+epub_title = 'Grapy'
+epub_author = 'Li Meng Jun'
+epub_publisher = 'Li Meng Jun'
+epub_copyright = '2013, Li Meng Jun'
+
+# The basename for the epub file. It defaults to the project name.
+#epub_basename = 'grapy'
+
+# The HTML theme for the epub output. Since the default themes are not optimized
+# for small screen space, using the same theme for HTML and epub output is
+# usually not wise. This defaults to 'epub', a theme designed to save visual
+# space.
+#epub_theme = 'epub'
+
+# The language of the text. It defaults to the language option
+# or en if the language is not set.
+#epub_language = ''
+
+# The scheme of the identifier. Typical schemes are ISBN or URL.
+#epub_scheme = ''
+
+# The unique identifier of the text. This can be a ISBN number
+# or the project homepage.
+#epub_identifier = ''
+
+# A unique identification for the text.
+#epub_uid = ''
+
+# A tuple containing the cover image and cover page html template filenames.
+#epub_cover = ()
+
+# A sequence of (type, uri, title) tuples for the guide element of content.opf.
+#epub_guide = ()
+
+# HTML files that should be inserted before the pages created by sphinx.
+# The format is a list of tuples containing the path and title.
+#epub_pre_files = []
+
+# HTML files shat should be inserted after the pages created by sphinx.
+# The format is a list of tuples containing the path and title.
+#epub_post_files = []
+
+# A list of files that should not be packed into the epub file.
+#epub_exclude_files = []
+
+# The depth of the table of contents in toc.ncx.
+#epub_tocdepth = 3
+
+# Allow duplicate toc entries.
+#epub_tocdup = True
+
+# Choose between 'default' and 'includehidden'.
+#epub_tocscope = 'default'
+
+# Fix unsupported image types using the PIL.
+#epub_fix_images = False
+
+# Scale large images.
+#epub_max_image_width = 0
+
+# How to display URL addresses: 'footnote', 'no', or 'inline'.
+#epub_show_urls = 'inline'
+
+# If false, no index is generated.
+#epub_use_index = True
+
+
+# Example configuration for intersphinx: refer to the Python standard library.
+intersphinx_mapping = {'http://docs.python.org/': None}
diff --git a/docs/index.rst b/docs/index.rst
new file mode 100644
index 0000000..f9012cc
--- /dev/null
+++ b/docs/index.rst
@@ -0,0 +1,324 @@
+.. grapy documentation master file, created by
+   sphinx-quickstart on Thu Dec  5 10:47:15 2013.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+Welcome to grapy's documentation!
+=================================
+
+Contents:
+
+.. toctree::
+   :maxdepth: 2
+
+
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`
+
+===============
+Crawl Tutorial
+===============
+
+In this tutorial, we'll assume that Crawl is already installed on your system.
+If that's not the case, see :ref:`intro-install`.
+
+We are going to use `Open directory project (dmoz) <http://www.dmoz.org/>`_ as
+our example domain to scrape.
+
+This tutorial will walk you through these tasks:
+
+1. Creating a new Crawl project
+2. Defining the Items you will extract
+3. Writing a :ref:`spider <topics-spiders>` to grapy a site and extract
+   :ref:`Items <topics-items>`
+4. Writing an :ref:`Item Pipeline <topics-item-pipeline>` to store the
+   extracted Items
+
+Crawl is written in Python_. If you're new to the language you might want to
+start by getting an idea of what the language is like, to get the most out of
+Crawl.  If you're already familiar with other languages, and want to learn
+Python quickly, we recommend `Learn Python The Hard Way`_.  If you're new to programming
+and want to start with Python, take a look at `this list of Python resources
+for non-programmers`_.
+
+.. _Python: http://www.python.org
+.. _this list of Python resources for non-programmers: http://wiki.python.org/moin/BeginnersGuide/NonProgrammers
+.. _Learn Python The Hard Way: http://learnpythonthehardway.org/book/
+
+Creating a project
+==================
+
+Before you start crawling, you will have set up a new Grapy project. Enter a
+directory where you'd like to store your code and then run::
+
+   mkdir tutorial
+   mkdir tutorial/spiders
+   touch tutorial/__init__.py
+   touch tutorial/items.py
+   touch tutorial/pipelines.py
+   touch tutorial/middlewares.py
+   touch tutorial/spiders/__init__.py
+   touch config.py
+   touch main.py
+
+These are basically:
+
+* ``config.py``: the project configuration file
+* ``tutorial/``: the project's python module, you'll later import your code from
+  here.
+* ``tutorial/items.py``: the project's items file.
+* ``tutorial/pipelines.py``: the project's pipelines file.
+* ``tutorial/middlewares.py``: the project's middlewares file.
+* ``tutorial/spiders/``: a directory where you'll later put your spiders.
+
+Defining our Item
+=================
+
+`Item` are containers that will be loaded with the crawled data; they work
+like simple python dicts but provide additional protecting against populating
+undeclared fields, to prevent typos.
+
+They are declared by creating an :class:`grapy.core.Item` class and defining
+its attributes as :attr:`grapy.core.Item._fields` objects, like you will in an ORM
+(don't worry if you're not familiar with ORMs, you will see that this is an
+easy task).
+
+We begin by modeling the item that we will use to hold the sites data obtained
+from dmoz.org, as we want to capture the name, url and description of the
+sites, we define fields for each of these three attributes. To do that, we edit
+items.py, found in the ``tutorial`` directory. Our Item class looks like this::
+
+    from grapy.core import Item
+
+    class DmozItem(Item):
+        _fields = [
+            {'name': 'title', 'type': 'str'},
+            {'name': 'link',  'type': 'str'},
+            {'name': 'desc',  'type': 'str'}
+        ]
+
+This may seem complicated at first, but defining the item allows you to use other handy
+components of Crawl that need to know how your item looks like.
+
+Our first Spider
+================
+
+Spiders are user-written classes used to crawl information from a domain (or group
+of domains).
+
+They define an initial list of URLs to download, how to follow links, and how
+to parse the contents of those pages to extract :ref:`items <topics-items>`.
+
+To create a Spider, you must subclass :class:`grapy.core.BaseSpider`, and
+define the three main, mandatory, attributes:
+
+* :attr:`~grapy.core.BaseSpider.name`: identifies the Spider. It must be
+  unique, that is, you can't set the same name for different Spiders.
+
+* :attr:`~grapy.core.BaseSpider.start_urls`: is a list of URLs where the
+  Spider will begin to grapy from.  So, the first pages downloaded will be those
+  listed here. The subsequent URLs will be generated successively from data
+  contained in the start URLs.
+
+* :meth:`~grapy.core.BaseSpider.parse` is a method of the spider, which will
+  be called with the downloaded :class:`~grapy.core.Response` object of each
+  start URL. The response is passed to the method as the first and only
+  argument.
+
+  This method is responsible for parsing the response data and extracting
+  grapyed data (as grapyed items) and more URLs to follow.
+
+  The :meth:`~grapy.core.BaseSpider.parse` method is in charge of processing
+  the response and returning crawled data (as :class:`~grapy.core.Item`
+  objects) and more URLs to follow (as :class:`~grapy.core.Request` objects).
+
+This is the code for our first Spider; save it in a file named
+``dmoz_spider.py`` under the ``tutorial/spiders`` directory::
+
+   from grapy.core import BaseSpider
+
+   class DmozSpider(BaseSpider):
+       name = "dmoz"
+       start_urls = [
+           "http://www.dmoz.org/Computers/Programming/Languages/Python/Books/",
+           "http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/"
+       ]
+
+       def parse(self, response):
+           filename = response.url.split("/")[-2]
+           open(filename, 'wb').write(response.content)
+
+
+Crawling
+========
+
+To put our spider to work, go to the project's top level directory and edit ``main.py``::
+
+    from grapy import engine
+    from grapy.sched import Scheduler
+    from tutorial.spiders.dmoz_spider import DmozSpider
+
+    sched = Scheduler()
+    engine.set_sched(sched)
+    engine.set_spiders([DmozSpider()])
+
+    engine.start()
+
+then::
+
+    python3 main.py
+
+But more interesting, as our ``parse`` method instructs, two files have been
+created: *Books* and *Resources*, with the content of both URLs.
+
+What just happened under the hood?
+==================================
+
+Crawl creates :class:`grapy.core.Request` objects for each URL in the
+``start_urls`` attribute of the Spider, and assigns them the ``parse`` method of
+the spider as their callback function.
+
+These Requests are scheduled, then executed, and
+:class:`grapy.core.Response` objects are returned and then fed back to the
+spider, through the :meth:`~grapy.core.BaseSpider.parse` method.
+
+Extracting Items
+================
+
+There are several ways to extract data from web pages.
+Scrapy use :attr:`~grapy.core.Response.soup` and
+:meth:`~grapy.core.Response.select` base on `BeautifulSoup`_
+
+Let's add this code to our spider::
+
+   from grapy.core import BaseSpider
+
+   class DmozSpider(BaseSpider):
+       name = "dmoz"
+       start_urls = [
+           "http://www.dmoz.org/Computers/Programming/Languages/Python/Books/",
+           "http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/"
+       ]
+
+       def parse(self, response):
+           for site in response.select('ul li'):
+               elem = site.find('a')
+               if elem:
+                   title = elem.get_text()
+                   link = elem.get('href')
+                   desc = site.get_text()
+                   print(title, link, desc)
+
+Now try crawling the dmoz.org domain again and you'll see sites being printed
+in your output, run::
+
+   python3 main.py
+
+Using our item
+==============
+
+:class:`~grapy.core.Item` objects are custom python dicts; you can access the
+values of their fields (attributes of the class we defined earlier) using the
+standard dict syntax like::
+
+   >>> item = DmozItem()
+   >>> item['title'] = 'Example title'
+   >>> item['title']
+   'Example title'
+   >>> item.title
+   'Example title'
+
+Spiders are expected to return their grapyed data inside
+:class:`~grapy.core.Item` objects. So, in order to return the data we've
+grapyed so far, the final code for our Spider would be like this::
+
+   from grapy.core import BaseSpider
+   from tutorial.items import DmozItem
+
+   class DmozSpider(BaseSpider):
+       name = "dmoz"
+       start_urls = [
+           "http://www.dmoz.org/Computers/Programming/Languages/Python/Books/",
+           "http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/"
+       ]
+
+       def parse(self, response):
+           items = []
+           for site in response.select('ul li'):
+               elem = site.find('a')
+               if elem:
+                   item = DmozItem()
+                   title = elem.get_text()
+                   link = elem.get('href')
+                   desc = site.get_text()
+                   print(title, link, desc)
+                   items.append(item)
+
+           return items
+
+Next steps
+==========
+
+This tutorial covers only the basics of Crawl, but there's a lot of other
+features not mentioned here.
+
+.. _intro-install:
+
+==================
+Installation guide
+==================
+
+Pre-requisites
+==============
+
+The installation steps assume that you have the following things installed:
+
+* `Python`_ 3.3
+* `asyncio`_ Python 3 async library
+* `aiohttp`_ http client/server for asyncio
+* `BeautifulSoup`_ Beautiful Soup: We called him Tortoise because he taught us
+* `aiogear`_ Gearman client/worker for asyncio
+* `pip`_ or `easy_install`_ Python package managers
+* `Gearman`_ Gearman Job Server
+
+Installing Crawl
+=================
+
+To install using source::
+
+    git clone ssh://gitlab@gitlab.widget-inc.com:65422/pinbot-grapy/grapy.git
+    cd grapy
+    python3 setup.py install
+
+.. _Python: http://www.python.org
+.. _asyncio: https://code.google.com/p/tulip/
+.. _aiohttp: https://github.com/fafhrd91/aiohttp
+.. _BeautifulSoup: http://www.crummy.com/software/BeautifulSoup/
+.. _Gearman: http://gearman.org/
+.. _aiogear: https://github.com/Lupino/aiogear
+.. _pip: http://www.pip-installer.org/en/latest/installing.html
+.. _easy_install: http://pypi.python.org/pypi/setuptools
+
+.. _topics-spiders:
+
+==================
+Spider
+==================
+
+.. _topics-items:
+
+==================
+Item
+==================
+
+.. _topics-item-pipeline:
+
+==================
+Pipeline
+==================
+
diff --git a/docs/modules/grapy.core.rst b/docs/modules/grapy.core.rst
new file mode 100644
index 0000000..e965b3f
--- /dev/null
+++ b/docs/modules/grapy.core.rst
@@ -0,0 +1,70 @@
+grapy.core package
+==================
+
+Submodules
+----------
+
+grapy.core.base_sched module
+----------------------------
+
+.. automodule:: grapy.core.base_sched
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+grapy.core.base_spider module
+-----------------------------
+
+.. automodule:: grapy.core.base_spider
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+grapy.core.engine module
+------------------------
+
+.. automodule:: grapy.core.engine
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+grapy.core.exceptions module
+----------------------------
+
+.. automodule:: grapy.core.exceptions
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+grapy.core.item module
+----------------------
+
+.. automodule:: grapy.core.item
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+grapy.core.request module
+-------------------------
+
+.. automodule:: grapy.core.request
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+grapy.core.response module
+--------------------------
+
+.. automodule:: grapy.core.response
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+
+Module contents
+---------------
+
+.. automodule:: grapy.core
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/modules/grapy.rst b/docs/modules/grapy.rst
new file mode 100644
index 0000000..a4a9ed6
--- /dev/null
+++ b/docs/modules/grapy.rst
@@ -0,0 +1,45 @@
+grapy package
+=============
+
+Subpackages
+-----------
+
+.. toctree::
+
+    grapy.core
+
+Submodules
+----------
+
+grapy.logging module
+--------------------
+
+.. automodule:: grapy.logging
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+grapy.sched module
+------------------
+
+.. automodule:: grapy.sched
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+grapy.utils module
+------------------
+
+.. automodule:: grapy.utils
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+
+Module contents
+---------------
+
+.. automodule:: grapy
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/modules/modules.rst b/docs/modules/modules.rst
new file mode 100644
index 0000000..f8c8050
--- /dev/null
+++ b/docs/modules/modules.rst
@@ -0,0 +1,7 @@
+..
+==
+
+.. toctree::
+   :maxdepth: 4
+
+   grapy
diff --git a/grapy/__init__.py b/grapy/__init__.py
new file mode 100644
index 0000000..1d0280e
--- /dev/null
+++ b/grapy/__init__.py
@@ -0,0 +1,5 @@
+from .core import Engine
+
+__all__ = ['engine']
+
+engine = Engine()
diff --git a/grapy/core/__init__.py b/grapy/core/__init__.py
new file mode 100644
index 0000000..0562bf7
--- /dev/null
+++ b/grapy/core/__init__.py
@@ -0,0 +1,9 @@
+from .engine import Engine
+from .base_spider import BaseSpider
+from .base_sched import BaseScheduler
+from .request import Request
+from .response import Response
+from .item import Item, dump_item, load_item
+
+__all__ = ['Engine', 'BaseSpider', 'BaseScheduler', 'Request', 'Response',
+    'Item', 'dump_item', 'load_item']
diff --git a/grapy/core/base_sched.py b/grapy/core/base_sched.py
new file mode 100644
index 0000000..bf7fc31
--- /dev/null
+++ b/grapy/core/base_sched.py
@@ -0,0 +1,35 @@
+import asyncio
+
+class BaseScheduler(object):
+    def __init__(self):
+        self.engine = None
+        self.is_running = False
+
+    def push_req(self, req):
+        '''
+        push the request
+        '''
+        raise NotImplementedError('you must rewrite at sub class')
+
+    def push_item(self, item):
+        yield from self.submit_item(item)
+
+    def submit_req(self, req):
+        yield from self.engine.process(req)
+
+    def submit_item(self, item):
+        yield from self.engine.process_item(item)
+
+    @asyncio.coroutine
+    def run(self):
+        '''
+        run the scheduler
+        '''
+        raise NotImplementedError('you must rewrite at sub class')
+
+    def start(self):
+        if self.is_running:
+            return
+
+        self.is_running = True
+        return asyncio.Task(self.run())
diff --git a/grapy/core/base_spider.py b/grapy/core/base_spider.py
new file mode 100644
index 0000000..fa05223
--- /dev/null
+++ b/grapy/core/base_spider.py
@@ -0,0 +1,31 @@
+from .request import Request
+
+__all__ = ['BaseSpider']
+
+class BaseSpider(object):
+    '''The BaseSpider, all the spider recommend to extends this'''
+
+    __slots__ = ['name', 'start_urls']
+
+    def __init__(self, name=None, start_urls=[]):
+        '''
+        @name: the spider name, unique
+
+        @start_urls: the start request url
+        '''
+        self.name = name
+        self.start_urls = start_urls
+
+    def start_request(self):
+        '''you can rewrite it for custem start request'''
+        for url in self.start_urls:
+            req = Request(url)
+            req.unique = False
+            yield req
+
+    def parse(self, response):
+        '''
+        the default spider parse function.
+        you must rewrite on a sub class.
+        '''
+        raise NotImplementedError('you must rewrite at sub class')
diff --git a/grapy/core/engine.py b/grapy/core/engine.py
new file mode 100644
index 0000000..9d45c9b
--- /dev/null
+++ b/grapy/core/engine.py
@@ -0,0 +1,150 @@
+import asyncio
+from .request import Request
+import inspect
+from .item import Item
+from ..logging import logger
+from .exceptions import EngineError
+
+__all__ = ['Engine']
+
+class Engine(object):
+
+    __slots__ = ['pipelines', 'spiders', 'middlewares', 'sched', 'loop']
+
+    def __init__(self, loop=None):
+        self.pipelines = []
+        self.spiders = {}
+        self.middlewares = []
+        self.sched = None
+        self.loop = loop
+        if not self.loop:
+            self.loop = asyncio.get_event_loop()
+
+    def set_spiders(self, spiders):
+        self.spiders = {}
+        if isinstance(spiders, dict):
+            for name, spider in spiders.items():
+                self.spiders[name] = spider
+
+        else:
+            self.add_spiders(spiders)
+
+    def add_spiders(self, spiders):
+        for spider in spiders:
+            self.add_spider(spider)
+
+    def add_spider(self, spider):
+        if spider.name in self.spiders.keys():
+            raise EngineError('Spider[%s] is already exists'%spider.name)
+        self.spiders[spider.name] = spider
+
+    def remove_spider(self, spider_name):
+        self.spiders.pop(spider_name)
+
+    def get_spider(self, name):
+        spider = self.spiders.get(name)
+        if spider:
+            return spider
+        else:
+            raise EngineError('Spider[%s] is not found'%name)
+
+    def set_pipelines(self, pipelines):
+        self.pipelines = pipelines
+
+    def set_middlewares(self, middlewares):
+        self.middlewares = middlewares
+
+    def set_sched(self, sched):
+        self.sched = sched
+        self.sched.engine = self
+
+    @asyncio.coroutine
+    def process(self, req):
+        req = yield from self.process_middleware('before_process_request', req)
+
+        rsp = yield from req.request()
+
+        rsp.req = req
+
+        rsp = yield from self.process_middleware('after_process_response', rsp)
+
+        yield from self.process_response(rsp)
+
+    @asyncio.coroutine
+    def process_middleware(self, name, obj):
+        for mid in self.middlewares:
+            if hasattr(mid, name):
+                func = getattr(mid, name)
+                obj = func(obj)
+                if isinstance(obj, asyncio.Future) or inspect.isgenerator(obj):
+                    obj = yield from obj
+
+        return obj
+
+    @asyncio.coroutine
+    def process_item(self, item, pipelines=None):
+        if not pipelines:
+            pipelines = self.pipelines
+
+        for pip in pipelines:
+
+            item = pip.process(item)
+            if isinstance(item, asyncio.Future) or inspect.isgenerator(item):
+                item = yield from item
+
+    @asyncio.coroutine
+    def process_response(self, rsp):
+        spider_name = rsp.req.spider
+        callback = rsp.req.callback
+        args = rsp.req.callback_args
+        spider = self.get_spider(spider_name)
+        func = getattr(spider, callback)
+        items = func(rsp, *args)
+        if items is None:
+            return
+        for item in items:
+            if isinstance(item, Request):
+                item.spider = spider.name
+                logger.info('Find url[{}] on requset[{}] by spider[{}]'.\
+                        format(item.url, rsp.url, spider.name))
+
+                item.group = rsp.req.group
+                item.ref = rsp.req.req_id
+
+                yield from self.push_req(item)
+            elif isinstance(item, Item):
+                yield from self.push_item(item)
+            else:
+                raise EngineError('Unknow type')
+
+    @asyncio.coroutine
+    def push_req(self, req, middleware=True):
+        if middleware:
+            req = yield from self.process_middleware('before_push_request', req)
+
+        req = self.sched.push_req(req)
+        if isinstance(req, asyncio.Future) or inspect.isgenerator(req):
+            req = yield from req
+
+    @asyncio.coroutine
+    def push_item(self, item):
+            ret = self.sched.push_item(item)
+            if isinstance(ret, asyncio.Future) or inspect.isgenerator(ret):
+                ret = yield from ret
+
+    def start_request(self):
+        for spider in self.spiders.values():
+            for req in spider.start_request():
+                req.spider = spider.name
+                yield from self.push_req(req)
+
+    def run(self):
+        yield from self.start_request()
+        self.sched.start()
+
+    def start(self):
+        asyncio.Task(self.run())
+        self.loop.run_forever()
+
+    def shutdown(self):
+        self.loop.close()
diff --git a/grapy/core/exceptions.py b/grapy/core/exceptions.py
new file mode 100644
index 0000000..14af66d
--- /dev/null
+++ b/grapy/core/exceptions.py
@@ -0,0 +1,16 @@
+__all__ = ['EngineError', 'DropItem', 'IgnoreRequest', 'RetryRequest', 'ItemError']
+
+class EngineError(Exception):
+    pass
+
+class DropItem(Exception):
+    pass
+
+class IgnoreRequest(Exception):
+    pass
+
+class RetryRequest(Exception):
+    pass
+
+class ItemError(Exception):
+    pass
diff --git a/grapy/core/item.py b/grapy/core/item.py
new file mode 100644
index 0000000..20b4a98
--- /dev/null
+++ b/grapy/core/item.py
@@ -0,0 +1,170 @@
+import json
+import re
+from .exceptions import ItemError
+from ..utils import import_module
+from uuid import uuid1 as uuid
+
+__all__ = ['Item', 'load_item', 'dump_item']
+
+class Item(object):
+    _null_char = '\x01'
+
+    _extra_field = {'name': 'extra', 'type': 'json'}
+
+    _fields =  [
+        {'name': 'extra', 'type': 'json'}
+    ]
+
+    __slots__ = ['__dict__']
+
+    def __init__(self, payload = {}):
+
+        if self._extra_field not in self._fields:
+            self._fields.append(self._extra_field)
+
+        if not isinstance(payload, dict):
+            payload = self.unpack(payload)
+
+        self.update(payload)
+
+    def __getitem__(self, key, default=None):
+        '''x.__getitem__(y) <==> x[y]'''
+        return getattr(self, key, default)
+
+    def __setitem__(self, key, val):
+        '''x.__setitem__(i, y) <==> x[i]=y'''
+        if isinstance(val, str):
+            val = val.strip()
+        setattr(self, key, val)
+
+    def keys(self):
+        '''D.keys() -> a set-like object providing a view on D's keys'''
+        return self.__dict__.keys()
+
+    def values(self):
+        '''D.values() -> an object providing a view on D's values'''
+        return self.__dict__.values()
+
+    def items(self):
+        return self.__dict__.items()
+
+    def pop(self, key, default=None):
+        '''
+        D.pop(k[,d]) -> v, remove specified key and return the corresponding value.
+        If key is not found, d is returned if given, otherwise KeyError is raised
+        '''
+        return self.__dict__.pop(key, default)
+
+    def get(self, key, default=None):
+        '''D.get(k[,d]) -> D[k] if k in D, else d.  d defaults to None.'''
+        return self.__dict__.get(key, default)
+
+    def update(self, item):
+        '''
+        D.update([E, ]**F) -> None.
+        * Update D from dict/iterable E and F.
+        * If E present and has a .keys() method, does:     for k in E: D[k] = E[k]
+        * If E present and lacks .keys() method, does:     for (k, v) in E: D[k] = v
+        * In either case, this is followed by: for k in F: D[k] = F[k]
+        '''
+        for k, v in item.items():
+            if isinstance(v, str):
+                item[k] = v.strip()
+        return self.__dict__.update(item)
+
+    def copy(self):
+        return self.__dict__.copy()
+
+    def pack(self):
+        '''D.pack() -> a bytes object. pack item'''
+        payload = dict(self)
+        keys = list(map(lambda x: x['name'], self._fields))
+        tps = list(map(lambda x: x['type'], self._fields))
+        tps = dict(zip(keys, tps))
+
+        none_keys = list(filter(lambda x: not payload[x], payload.keys()))
+        list(map(payload.pop, none_keys))
+
+        other_keys = filter(lambda x : x not in keys, payload.keys())
+        other = dict(zip(other_keys, map(lambda x: payload[x], other_keys)))
+
+        payload[self._extra_field['name']] = other
+
+        def _pack(key):
+            val = payload.get(key, '')
+            tp = tps[key]
+
+            if val:
+                if tp == 'json':
+                    val = json.dumps(val)
+            else:
+                val = ''
+            if not isinstance(val, str):
+                val = str(val)
+            return val
+        return self._null_char.join(map(_pack, keys))
+
+    def unpack(self, payload):
+        '''unpack item'''
+        if isinstance(payload, bytes):
+            payload = str(payload, 'utf-8')
+
+        keys = list(map(lambda x: x['name'], self._fields))
+        tps = list(map(lambda x: x['type'], self._fields))
+        tps = dict(zip(keys, tps))
+
+        payload = payload.split(self._null_char)
+
+        def _unpack(pack):
+            key, val = pack
+            tp = tps[key]
+            if not val:
+                return key, val
+            if tp == 'json':
+                val = json.loads(val)
+            elif tp == 'int':
+                val = int(val)
+            elif tp == 'float':
+                val = float(val)
+            elif tp == 'bool':
+                val = bool(val)
+            return key, val
+
+        payload = dict(map(_unpack, zip(keys, payload)))
+
+        if payload.get(self._extra_field['name']):
+            other = payload.pop(self._extra_field['name'])
+            if isinstance(other, dict):
+                payload.update(other)
+
+        return payload
+
+    def __str__(self):
+        return json.dumps(self.__dict__, indent=2)
+
+    def __bytes__(self):
+        return bytes(self.pack(), 'utf-8')
+
+    @property
+    def unique(self):
+        return str(uuid())
+
+NULL_CHAR = '\x02\x00\x00'
+def dump_item(klass, *args, **kwargs):
+    '''dump the Item'''
+    cls = klass.__class__
+    cls_name = re.search("'([^']+)'", str(cls)).group(1)
+    if not isinstance(klass, Item):
+        raise ItemError(
+                'ItemError: %s is not instance crawl.core.item.Item'%cls_name)
+    retval = NULL_CHAR.join([cls_name, klass.pack()])
+    return retval
+
+def load_item(string):
+    '''load the Item'''
+    cls_name, data = string.split(NULL_CHAR)
+    klass = import_module(cls_name, data)
+    if not isinstance(klass, Item):
+        raise ItemError(
+                'ItemError: %s is not instance crawl.core.item.Item'%cls_name)
+    return klass
diff --git a/grapy/core/request.py b/grapy/core/request.py
new file mode 100644
index 0000000..812f318
--- /dev/null
+++ b/grapy/core/request.py
@@ -0,0 +1,122 @@
+import json
+import re
+import asyncio
+import aiohttp
+from ..logging import logger
+from urllib.parse import urljoin
+from .response import Response
+from .exceptions import IgnoreRequest, RetryRequest
+
+__all__ = ['Request']
+
+class Request(object):
+    '''
+    the Request object
+    '''
+
+    _keys = ['url', 'method', 'callback', 'callback_args', 'kwargs', 'spider',
+            'req_id', 'group']
+    _default = [{}, (), 'get', None, [], 'default']
+
+    _json_keys = ['callback_args', 'kwargs']
+
+    _null_char = '\x01'
+
+    __slots__ = ['url', 'method', 'callback', 'callback_args', 'kwargs',
+                 'spider', 'unique', 'req_id', 'ref', 'group']
+
+    def __init__(self, url, method='get',
+            callback='parse', callback_args = [], **kwargs):
+        self.url = url
+        self.method = method
+        self.callback = callback
+        self.callback_args = callback_args
+        self.kwargs = kwargs
+        self.spider = 'default'
+        self.unique = True
+        self.req_id = 0
+        self.ref = 0
+        self.group = 0
+
+    def pack(self):
+        '''
+        pack the Request object on bytes
+        '''
+        def _pack(key):
+            val = getattr(self, key, '')
+            if val not in self._default:
+                if key in self._json_keys:
+                    val = json.dumps(val)
+            else:
+                val = ''
+            if not isinstance(val, str):
+                val = str(val)
+            return val
+        return bytes(self._null_char.join(map(_pack, self._keys)), 'utf-8')
+
+    def unpack(self, payload):
+        '''
+        unpack the Request payload
+        '''
+        payload = str(payload, 'utf-8')
+        payload = payload.split(self._null_char)
+        payload = dict(zip(self._keys, payload))
+
+        for json_key in self._json_keys:
+            if payload[json_key]:
+                payload[json_key] = json.loads(payload[json_key])
+
+        return payload
+
+    def __bytes__(self):
+        return self.pack()
+
+    @classmethod
+    def build(cls, payload):
+        '''
+        build a Request
+        '''
+        req = Request('')
+        payload = req.unpack(payload)
+        for key, val in payload.items():
+            if val:
+                if hasattr(req, key):
+                    setattr(req, key, val)
+        return req
+
+    @asyncio.coroutine
+    def request(self):
+        '''
+        do request default timeout is 300s
+
+        >>> req = Request('http://example.com')
+        >>> rsp = yield from req.request()
+        '''
+        method = self.method.lower()
+        kwargs = {
+            'timeout': 300
+        }
+        kwargs.update(self.kwargs.copy())
+        url = self.url
+
+        try:
+            rsp = yield from aiohttp.request(method, url, **kwargs)
+            ct = rsp.get('content-type', '')
+            logger.info('Request: {} {} {} {}'.format(method.upper(), url, rsp.status, ct))
+            yield from asyncio.sleep(5)
+            if rsp.status >= 400 and rsp.status < 500:
+                raise IgnoreRequest(url)
+            if rsp.status == 200:
+                if re.search('html|json|text|xml|rss', ct, re.I):
+                    content = yield from rsp.read()
+                    rsp.close()
+                    return Response(urljoin(url, rsp.url), content, rsp)
+                else:
+                    raise IgnoreRequest(url)
+            else:
+                logger.error('Request fail: {} {}'.format(url, rsp.status))
+                raise RetryRequest(url)
+
+        except (aiohttp.IncompleteRead, aiohttp.BadStatusLine) as exc:
+            logger.error(str(exc) + ': ' + url)
+            raise RetryRequest(exc)
diff --git a/grapy/core/response.py b/grapy/core/response.py
new file mode 100644
index 0000000..e46de7b
--- /dev/null
+++ b/grapy/core/response.py
@@ -0,0 +1,209 @@
+import re
+from bs4 import BeautifulSoup
+import json
+
+RE_XML = re.compile('<\?xml.+encoding=["\']([^\'"]+?)["\'].+\?>', re.I)
+RE_HTML = re.compile('<meta.+charset=["\']([^\'"]+?)[\'"].+>', re.I)
+
+__all__ = ['Response']
+
+class Response(object):
+
+    __slots__ = ['url', 'raw', 'encoding', 'content', '_soup', 'req']
+
+    def __init__(self, url, content, raw):
+        self.raw = raw
+        self.url = url
+        self._soup = None
+        self.encoding = None
+        self.content = content
+        self.req = None
+
+    @property
+    def text(self):
+        'return the unicode document'
+        content = self.content
+        if self.encoding:
+            return str(content, self.encoding, errors = 'ignore')
+
+        charset = self._get_charset(content)
+        if charset:
+            self.encoding = charset
+            return str(content, charset, errors = 'ignore')
+        else:
+            try:
+                self.encoding = 'GBK'
+                return str(content, 'GBK')
+            except UnicodeDecodeError:
+                self.encoding = 'UTF-8'
+                return str(content, 'UTF-8', errors = 'ignore')
+
+    def json(self):
+        '''return json document, maybe raise'''
+        # ct = self.raw.get('content-type', '').lower()
+        # if ct == 'application/json':
+        data = self.content
+        data = json.loads(data.decode('utf-8'))
+        return data
+
+    @property
+    def headers(self):
+        'return the request headers'
+        return self.raw.items()
+
+    @property
+    def soup(self):
+        '''return the instance of BeautifulSoup'''
+        if self._soup is None:
+            text = self.text
+            self._soup = BeautifulSoup(text)
+        return self._soup
+
+    def _get_charset(self, content):
+
+        def map_charset(charset):
+            if charset:
+                charset = charset.upper()
+                if charset == 'GB2312':
+                    charset = 'GBK'
+            return charset
+
+        ct = ''
+        try:
+            ct = self.raw.get('content-type', '').lower()
+        except:
+            pass
+        p = re.search('charset=(.+)$', ct)
+        if p:
+            charset = p.group(1)
+            return map_charset(charset)
+
+        content = str(content, 'utf-8', errors='ignore')
+        xml = RE_XML.search(content)
+        if xml:
+            charset = xml.group(1)
+            return map_charset(charset)
+
+        html = RE_HTML.search(content)
+        if html:
+            charset = html.group(1)
+            return map_charset(charset)
+
+        return None
+
+    def select(self, selector):
+        '''
+        select elements use the css selector
+        '''
+        soup = self.soup
+        re_tag = re.compile('^[a-z0-9]+$', re.I | re.U)
+        re_attribute = re.compile('^(?P<tag>\w+)?\[(?P<attribute>[a-z\-_]+)(?P<operator>[=~\|\^\$\*]?)=?"?(?P<value>[^\]"]*)"?\]$')
+
+        def attribute_checker(operator, attribute, value = ''):
+            """
+            Takes an operator, attribute and optional value; returns a function
+            that will return True for elements that match that combination.
+            """
+
+            return {
+                '=': lambda el: el.get(attribute) == value,
+                # attribute includes value as one of a set of space separated tokens
+                '~': lambda el: value in el.get(attribute, '').split(),
+                # attribute starts with value
+                '^': lambda el: el.get(attribute, '').startswith(value),
+                # attribute ends with value
+                '$': lambda el: el.get(attribute, '').endswith(value),
+                # attribute contains value
+                '*': lambda el: value in el.get(attribute, ''),
+                # attribute is either exactly value or starts with value-
+                '|': lambda el: el.get(attribute, '') == value \
+                        or el.get(attribute, '').startswith('%s-' % value),
+            }.get(operator, lambda el: el.has_attr(attribute))
+
+        tokens = selector.split()
+        current_context = [soup]
+
+        for index, token in enumerate(tokens):
+            if tokens[index - 1] == '>':
+                continue
+
+            m = re_attribute.match(token)
+            if m:
+                # Attribute selector
+                tag, attribute, operator, value = m.groups()
+
+                if not tag:
+                    tag = True
+
+                checker = attribute_checker(operator, attribute, value)
+
+                found = []
+                for context in current_context:
+                    found.extend([el for el in context.find_all(tag) if checker(el)])
+
+                current_context = found
+                continue
+
+            if '#' in token:
+                # ID selector
+                tag, id = token.split('#', 1)
+                if not tag:
+                    tag = True
+
+                el = current_context[0].find(tag, {'id': id})
+                if not el:
+                    return []
+
+                current_context = [el]
+                continue
+
+            if '.' in token:
+                # Class selector
+                tag, klass = token.split('.', 1)
+                if not tag:
+                    tag = True
+
+                klasses = set(klass.split('.'))
+                found = []
+                for context in current_context:
+                    found.extend(
+                        context.find_all(tag, {'class': lambda attr:
+                            attr and klasses.issubset(attr.split())})
+                    )
+
+                current_context = found
+                continue
+
+            if '*' in token:
+                # Star selector
+                found = []
+                for context in current_context:
+                    found.extend(context.find_all(True))
+
+                current_context = found
+                continue
+
+            if token == '>':
+                # Child selector
+                tag = tokens[index + 1]
+                if not tag:
+                    tag = True
+
+                found = []
+                for context in current_context:
+                    found.extend(context.find_all(tag, recursive=False))
+
+                current_context = found
+                continue
+
+            # Here we should just have a regular tag
+            if not re_tag.match(token):
+                return []
+
+            found = []
+            for context in current_context:
+                found.extend(context.find_all(token))
+
+            current_context = found
+
+        return current_context
diff --git a/grapy/logging.py b/grapy/logging.py
new file mode 100644
index 0000000..b0f805a
--- /dev/null
+++ b/grapy/logging.py
@@ -0,0 +1,2 @@
+import logging
+logger = logging.getLogger('crawl')
diff --git a/grapy/sched.py b/grapy/sched.py
new file mode 100644
index 0000000..663b042
--- /dev/null
+++ b/grapy/sched.py
@@ -0,0 +1,44 @@
+from .core import BaseScheduler
+import hashlib
+import asyncio
+
+__all__ = ['Scheduler']
+
+def hash_url(url):
+    h = hashlib.sha1()
+    h.update(bytes(url, 'utf-8'))
+    return h.hexdigest()
+
+class Scheduler(BaseScheduler):
+    def __init__(self, stroage = {}, queue=[], max_tasks=5):
+        BaseScheduler.__init__(self)
+        self._stroage = stroage
+        self._queue = queue
+        self._sem = asyncio.Semaphore(max_tasks)
+
+    def push_req(self, req):
+        key = hash_url(req.url)
+        if key in self._stroage:
+            return
+
+        self._queue.insert(0, req)
+        self._stroage[key] = {'key': key, 'req': req, 'crawled': False}
+
+        self.start()
+
+    def run(self):
+        while True:
+            if len(self._queue) == 0:
+                break
+
+            req = self._queue.pop()
+            yield from self._sem.acquire()
+            task = asyncio.Task(self.submit_req(req))
+            task.add_done_callback(lambda t: self._sem.release())
+
+        self.is_running = False
+
+    def submit_req(self, req):
+        yield from BaseScheduler.submit_req(self, req)
+        key = hash_url(req.url)
+        self._stroage[key] = {'key': key, 'req': req, 'crawled': True}
diff --git a/grapy/utils.py b/grapy/utils.py
new file mode 100644
index 0000000..bc4d6c6
--- /dev/null
+++ b/grapy/utils.py
@@ -0,0 +1,60 @@
+from importlib import import_module as _import_module
+from .logging import logger
+
+__all__ = ['import_module', 'import_pipelines', 'import_middlewares',
+           'import_spiders']
+
+def import_module(module_name, *args, **kwargs):
+    '''
+    import the module and init it
+    '''
+    logger.debug('import module[%s]'%module_name)
+    idx = module_name.rfind('.')
+    module = _import_module(module_name[:idx])
+    obj = getattr(module, module_name[idx+1:])
+    return obj(*args, **kwargs)
+
+def import_pipelines(pipelines):
+    '''
+    import module from a list like::
+
+        [
+            {'class_or_method:index': args},
+            {'class_or_method:index': kwargs},
+            {'class_or_method:index': None}
+        ]
+
+    the index is a number or string for order
+    '''
+    retval = []
+    for module_name, values in pipelines.items():
+        args = []
+        kwargs = {}
+        idx = module_name.find(':')
+        order = 0
+        if idx > -1:
+            order = int(module_name[idx+1:])
+            module_name = module_name[:idx]
+        tp = type(values)
+        if tp == tuple or tp == list:
+            args = values
+        elif tp == dict:
+            keys = values.keys()
+            if 'args' in keys or 'kwargs' in keys:
+                args = values.get('args', ())
+                kwargs = values.get('kwargs', {})
+            else:
+                kwargs = values
+
+        elif values is not None:
+            args.append(values)
+
+        retval.append((import_module(module_name, *args, **kwargs), order))
+
+    retval = [ret[0] for ret in sorted(retval, key=lambda x: x[1])]
+
+    return retval
+
+import_middlewares = import_pipelines
+import_spiders = import_pipelines
+
diff --git a/setup.py b/setup.py
new file mode 100755
index 0000000..e5da175
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,24 @@
+try:
+    from setuptools import setup
+except ImportError:
+    from distutils.core import setup
+
+packages = [
+    'grapy',
+    'grapy.core',
+]
+
+requires = ['asyncio', 'aiohttp', 'beautifulsoup4']
+
+setup(
+    name='grapy',
+    version='0.1.5',
+    description='a scrapy like model',
+    author='Li Meng Jun',
+    author_email='lmjubuntu@gmail.com',
+    url='http://lupino.me',
+    packages=packages,
+    package_dir={'grapy': 'grapy'},
+    include_package_data=True,
+    install_requires=requires,
+)