Skip to content

Commit

Permalink
Persisting headers, method and cookies in SQLAlchemy backend.
Browse files Browse the repository at this point in the history
  • Loading branch information
rajat authored and sibiryakov committed Sep 29, 2015
1 parent 8d1baea commit 909e98e
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 3 deletions.
18 changes: 16 additions & 2 deletions frontera/contrib/backends/sqlalchemy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

from frontera import Backend
from frontera.utils.misc import load_object
from frontera.core.models import Request, Response

# Default settings
DEFAULT_ENGINE = 'sqlite:///:memory:'
Expand Down Expand Up @@ -62,6 +63,9 @@ class State:
state = Column(String(12))
error = Column(String(20))
meta = Column(PickleType())
headers = Column(PickleType())
cookies = Column(PickleType())
method = Column(String(6))

@classmethod
def query(cls, session):
Expand Down Expand Up @@ -135,7 +139,8 @@ def get_next_requests(self, max_next_requests, **kwargs):
next_pages = []
for db_page in query:
db_page.state = Page.State.QUEUED
request = self.manager.request_model(url=db_page.url, meta=db_page.meta)
request = self.manager.request_model(url=db_page.url, meta=db_page.meta, headers=db_page.headers,
cookies=db_page.cookies, method=db_page.method)
next_pages.append(request)
self.session.commit()
return next_pages
Expand All @@ -161,9 +166,18 @@ def _create_page(self, obj):
db_page.fingerprint = obj.meta['fingerprint']
db_page.state = Page.State.NOT_CRAWLED
db_page.url = obj.url
db_page.depth = 0
db_page.created_at = datetime.datetime.utcnow()
db_page.meta = obj.meta
db_page.depth = 0

if isinstance(obj, Request):
db_page.headers = obj.headers
db_page.method = obj.method
db_page.cookies = obj.cookies
elif isinstance(obj, Response):
db_page.headers = obj.request.headers
db_page.method = obj.request.method
db_page.cookies = obj.request.cookies
return db_page

def _get_or_create_db_page(self, obj):
Expand Down
10 changes: 9 additions & 1 deletion frontera/utils/tester.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,12 @@ def _add_all(self):
self.frontier.add_seeds([self._make_request(link.url)])

def _make_request(self, url):
r = self.frontier.request_model(url=url)
r = self.frontier.request_model(url=url,
headers={
'X-Important-Header': 'Frontera'
},
method='POST',
cookies={'currency': 'USD'})
r.meta['this_param'] = 'should be passed over'
return r

Expand Down Expand Up @@ -67,6 +72,9 @@ def _run_iteration(self):
self.frontier.request_error(request=page_to_crawl,
error=crawled_page.status)
assert page_to_crawl.meta['this_param'] == 'should be passed over'
assert page_to_crawl.headers['X-Important-Header'] == 'Frontera'
assert page_to_crawl.method == 'POST'
assert page_to_crawl.cookies['currency'] == 'USD'
return requests


Expand Down

0 comments on commit 909e98e

Please sign in to comment.