forked from ccnmtl/mediathread
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
moving ccnmtl-specific code off tree
git-svn-id: http://svn.ccnmtl.columbia.edu/mondrian/trunk@18327 1f418930-7ff8-0310-b8ad-c653122473bc
- Loading branch information
sky
committed
Jan 5, 2010
0 parents
commit 2b579f8
Showing
219 changed files
with
21,371 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
._* | ||
*~ | ||
*.pyc | ||
ve |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
CLEANUP | ||
---------- | ||
|
||
1. registration/login.html (needs a link and text blurb) | ||
2. remove fromprod.sh | ||
3. alter /apache/ | ||
4. flunc tests rel. bvault | ||
5. bvault lxml crawl | ||
6. settings_shared: SECRET_KEY, WIND stuff, ADMINS,INTERNAL_IPS | ||
|
||
|
||
|
||
StructuredCollaboration Plans | ||
----------------------------- | ||
ARCHITECTURE | ||
1. objects are unique to a 'context' (locally shared ancestor) | ||
2. shallow structure except for recursively-typed ones | ||
3. setting content_object to a Collaboration | ||
a. will parse down for finding descendents | ||
b. but NOT for access control | ||
c. works like a symlink | ||
|
||
|
||
QUERIES (current) | ||
1. request.course still exists | ||
2. read permissions for context =course (project,asset) | ||
Project.objects.get(pk=pk,pk__in= ) | ||
3. user projects,assets,sherds by (course) | ||
|
||
USE CASES (future) | ||
1. Ordered collections (for individuals) | ||
2. Shared collections (for student teams) | ||
3. Transcripts | ||
4. Attaching objects to assignments, etc. | ||
5. Publishing (to the world, or wider than course/context) | ||
|
||
CURRENT FILES (to edit) | ||
projects/views.py | ||
==hard queries== | ||
*get_user_projects() | ||
*all user Sherds from a course | ||
*all projects from a course with user as participant | ||
-- ?index all objects by user (user,object,context) | ||
*all course tags (through sherds) | ||
-- ?cache (with update on new tag) | ||
assetmgr/views.py | ||
==hard queries== | ||
*all user Sherds from a course | ||
--we should optimize assuming people are generally in | ||
a single course (post query filter should be fine) | ||
*random (GET args) Asset query restricted to a course | ||
*all assets from a course | ||
--query collection directly | ||
*all course tags (through sherds) | ||
--maybe auto-tag domain, as well? | ||
-- ?cache (with update on new tag) | ||
projects/teplatetags/user_projects.py | ||
projects/models.py @get_user_projects() (called in projects/views.py) | ||
templates/projects/classlisting.html (user_projects) | ||
|
||
|
||
|
||
!!! = context | ||
|
||
Course1 (!!! only for creating a new collection/asset/project) | ||
Asset1 | ||
'Transcript' (stub Collaboration) | ||
Sherd[0] (0:00-0:15) "Good evening, ladies and gentlemen," | ||
Sherd[1] (0:15-0:30) "Blah blah blah," | ||
... | ||
|
||
Project1 [user] | ||
Project2 [group] !!! | ||
Sherd[0] (group-owned annotation) | ||
ColXX (stub Collaboration) = COMPARISON | ||
Sherd[1] | ||
Sherd[2] | ||
... | ||
|
||
'Collection1' (stub Collaboration) [id=666,user=a] !!! | ||
Sherd[id=1] | ||
Sherd[id=2] | ||
'Collection2' (stub Collaboration) [user=a] !!! | ||
Sherd[id=3] | ||
Collaboration[id=666] | ||
Sherd[id=4] | ||
|
||
?DiscussionBoard1 | ||
Sherd[id=3] (objects to attach to the discussion board) | ||
Asset1 | ||
|
Empty file.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
from models import Asset, Source | ||
from django.contrib import admin | ||
|
||
admin.site.register(Asset) | ||
admin.site.register(Source) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,161 @@ | ||
import simplejson | ||
import lxml.html | ||
from lxml.html import tostring | ||
|
||
from urlparse import urlsplit | ||
import urllib2 | ||
|
||
def annotated_by(assets, user): | ||
assets = assets.filter( | ||
sherdnote__author=user).distinct().select_related() | ||
to_return = [] | ||
for asset in assets: | ||
if asset.sherdnote_set.filter(author=user).exclude( | ||
range1=None, range2=None, title=None, | ||
tags='', body=None).count() == 0: | ||
continue | ||
to_return.append(asset) | ||
return to_return | ||
|
||
def most_popular(assets): | ||
""" | ||
considers popularity == number of distinct users who annotated | ||
the asset in any way (tag, global annotation, clip, etc) | ||
""" | ||
most_popular = {} | ||
for asset in assets: | ||
users_who_annotated_it = {} | ||
for annotation in asset.sherdnote_set.all(): | ||
if not users_who_annotated_it.has_key(annotation.author): | ||
users_who_annotated_it[annotation.author] = 0 | ||
users_who_annotated_it[annotation.author] += 1 | ||
popularity = len(users_who_annotated_it) | ||
setattr(asset, 'popularity', popularity) | ||
most_popular.setdefault(popularity, []).append(asset) | ||
|
||
pop_hash = most_popular | ||
most_popular = [] | ||
for count, assets in reversed(pop_hash.items()): | ||
most_popular.extend(assets) | ||
return most_popular | ||
|
||
|
||
def get_metadata(asset, authenticate=False, **auth_info): | ||
""" | ||
gets metadata for the asset and saves it to the database in a json dict | ||
if `authenticate` is True, then HTTP Basic Authentication will be used | ||
with realm, user and passwd information passed in as kwargs. | ||
important notes about the current implementation: | ||
* it's extremely coupled to the openvault site. it will not work for | ||
any assets that were not taken from the openvault site, period. | ||
* it blindly makes an http request to the asset url. so this really | ||
should not be done synchronously. it's being done synchronously. | ||
* it currently only stores the asset description. other metadata can | ||
be added as needed. | ||
* it does a screenscrape of the html. did i mention it's coupled to | ||
the openvault implementation? | ||
""" | ||
|
||
html_content = asset.html_source | ||
# ^^ will error if there is more than one hit, i think? | ||
|
||
if not html_content: | ||
# i dunno. `url` might ought to just be a required source? | ||
return | ||
|
||
url = html_content.url | ||
base_href = urlsplit(url) | ||
base_href = "%s://%s" % (base_href[0], base_href[1]) | ||
|
||
if authenticate: | ||
# set up authentication info | ||
authinfo = urllib2.HTTPBasicAuthHandler() | ||
authinfo.add_password(realm=auth_info['realm'], | ||
uri=base_href, | ||
user=auth_info['user'], | ||
passwd=auth_info['passwd']) | ||
|
||
# build a new opener that adds authentication and install it | ||
opener = urllib2.build_opener(authinfo) | ||
urllib2.install_opener(opener) | ||
|
||
f = urllib2.urlopen(url) | ||
|
||
assert f.code == 200 | ||
|
||
body = f.read() | ||
fragment = lxml.html.fromstring(body) | ||
fragment.make_links_absolute(base_href) | ||
|
||
metadatas = fragment.cssselect("div.metadata.primary>ul>li") | ||
metadata_dict = {} | ||
|
||
try: | ||
metadata_dict['citation'] = _get_metadata_citation(fragment) | ||
except IndexError: | ||
pass | ||
|
||
for metadata in metadatas: | ||
try: | ||
key = metadata.cssselect('h3')[0].text | ||
except IndexError: | ||
continue | ||
# here's hopin' bvault is ready. maria says yes | ||
if key == "Description": | ||
metadata_dict['description'] = \ | ||
_get_metadata_description(metadata) | ||
continue | ||
if key == "Related": | ||
related = _get_metadata_related(metadata) | ||
if related: | ||
metadata_dict['segments in this record'] = related | ||
pass | ||
|
||
#assert metadata_dict.has_key('description') | ||
#assert metadata_dict.has_key('related') | ||
|
||
metadata = simplejson.dumps(metadata_dict) | ||
asset.metadata_blob = metadata | ||
asset.save() | ||
|
||
return metadata | ||
|
||
def _get_metadata_citation(html, format=None): | ||
|
||
if format is None: | ||
format = 'chicago' | ||
|
||
citation = html.cssselect("div.citation#cite_%s" % format) | ||
citation = citation[0].text_content() | ||
return citation.replace('<', '<').replace('>', '>') | ||
|
||
# XXX TODO: just pass in the whole html fragment, dude | ||
def _get_metadata_related(metadata): | ||
metadatas = metadata.cssselect("div.content ul>li>div.hentry") | ||
|
||
if len(metadatas): | ||
return ''.join(tostring(metadata).replace('\n', '') | ||
for metadata in metadatas) | ||
else: | ||
return None | ||
|
||
# XXX TODO: just pass in the whole html fragment, dude | ||
def _get_metadata_description(metadata): | ||
|
||
metadatas = metadata.cssselect("div.content>div") | ||
|
||
description = None | ||
for metadata in metadatas: | ||
key = metadata.getchildren()[0] | ||
if key.tag.upper() != "STRONG": | ||
continue | ||
if not key.text.endswith("Description:"): | ||
continue | ||
description = metadata.text_content()[len(key.text):] | ||
break | ||
|
||
assert description is not None | ||
return description | ||
|
Oops, something went wrong.