Skip to content

Commit

Permalink
moving ccnmtl-specific code off tree
Browse files Browse the repository at this point in the history
git-svn-id: http://svn.ccnmtl.columbia.edu/mondrian/trunk@18327 1f418930-7ff8-0310-b8ad-c653122473bc
  • Loading branch information
sky committed Jan 5, 2010
0 parents commit 2b579f8
Show file tree
Hide file tree
Showing 219 changed files with 21,371 additions and 0 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
._*
*~
*.pyc
ve
91 changes: 91 additions & 0 deletions PLANS
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
CLEANUP
----------

1. registration/login.html (needs a link and text blurb)
2. remove fromprod.sh
3. alter /apache/
4. flunc tests rel. bvault
5. bvault lxml crawl
6. settings_shared: SECRET_KEY, WIND stuff, ADMINS,INTERNAL_IPS



StructuredCollaboration Plans
-----------------------------
ARCHITECTURE
1. objects are unique to a 'context' (locally shared ancestor)
2. shallow structure except for recursively-typed ones
3. setting content_object to a Collaboration
a. will parse down for finding descendents
b. but NOT for access control
c. works like a symlink


QUERIES (current)
1. request.course still exists
2. read permissions for context =course (project,asset)
Project.objects.get(pk=pk,pk__in= )
3. user projects,assets,sherds by (course)

USE CASES (future)
1. Ordered collections (for individuals)
2. Shared collections (for student teams)
3. Transcripts
4. Attaching objects to assignments, etc.
5. Publishing (to the world, or wider than course/context)

CURRENT FILES (to edit)
projects/views.py
==hard queries==
*get_user_projects()
*all user Sherds from a course
*all projects from a course with user as participant
-- ?index all objects by user (user,object,context)
*all course tags (through sherds)
-- ?cache (with update on new tag)
assetmgr/views.py
==hard queries==
*all user Sherds from a course
--we should optimize assuming people are generally in
a single course (post query filter should be fine)
*random (GET args) Asset query restricted to a course
*all assets from a course
--query collection directly
*all course tags (through sherds)
--maybe auto-tag domain, as well?
-- ?cache (with update on new tag)
projects/teplatetags/user_projects.py
projects/models.py @get_user_projects() (called in projects/views.py)
templates/projects/classlisting.html (user_projects)



!!! = context

Course1 (!!! only for creating a new collection/asset/project)
Asset1
'Transcript' (stub Collaboration)
Sherd[0] (0:00-0:15) "Good evening, ladies and gentlemen,"
Sherd[1] (0:15-0:30) "Blah blah blah,"
...

Project1 [user]
Project2 [group] !!!
Sherd[0] (group-owned annotation)
ColXX (stub Collaboration) = COMPARISON
Sherd[1]
Sherd[2]
...

'Collection1' (stub Collaboration) [id=666,user=a] !!!
Sherd[id=1]
Sherd[id=2]
'Collection2' (stub Collaboration) [user=a] !!!
Sherd[id=3]
Collaboration[id=666]
Sherd[id=4]

?DiscussionBoard1
Sherd[id=3] (objects to attach to the discussion board)
Asset1

Empty file added __init__.py
Empty file.
Empty file added assetmgr/__init__.py
Empty file.
5 changes: 5 additions & 0 deletions assetmgr/admin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from models import Asset, Source
from django.contrib import admin

admin.site.register(Asset)
admin.site.register(Source)
161 changes: 161 additions & 0 deletions assetmgr/lib.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
import simplejson
import lxml.html
from lxml.html import tostring

from urlparse import urlsplit
import urllib2

def annotated_by(assets, user):
assets = assets.filter(
sherdnote__author=user).distinct().select_related()
to_return = []
for asset in assets:
if asset.sherdnote_set.filter(author=user).exclude(
range1=None, range2=None, title=None,
tags='', body=None).count() == 0:
continue
to_return.append(asset)
return to_return

def most_popular(assets):
"""
considers popularity == number of distinct users who annotated
the asset in any way (tag, global annotation, clip, etc)
"""
most_popular = {}
for asset in assets:
users_who_annotated_it = {}
for annotation in asset.sherdnote_set.all():
if not users_who_annotated_it.has_key(annotation.author):
users_who_annotated_it[annotation.author] = 0
users_who_annotated_it[annotation.author] += 1
popularity = len(users_who_annotated_it)
setattr(asset, 'popularity', popularity)
most_popular.setdefault(popularity, []).append(asset)

pop_hash = most_popular
most_popular = []
for count, assets in reversed(pop_hash.items()):
most_popular.extend(assets)
return most_popular


def get_metadata(asset, authenticate=False, **auth_info):
"""
gets metadata for the asset and saves it to the database in a json dict
if `authenticate` is True, then HTTP Basic Authentication will be used
with realm, user and passwd information passed in as kwargs.
important notes about the current implementation:
* it's extremely coupled to the openvault site. it will not work for
any assets that were not taken from the openvault site, period.
* it blindly makes an http request to the asset url. so this really
should not be done synchronously. it's being done synchronously.
* it currently only stores the asset description. other metadata can
be added as needed.
* it does a screenscrape of the html. did i mention it's coupled to
the openvault implementation?
"""

html_content = asset.html_source
# ^^ will error if there is more than one hit, i think?

if not html_content:
# i dunno. `url` might ought to just be a required source?
return

url = html_content.url
base_href = urlsplit(url)
base_href = "%s://%s" % (base_href[0], base_href[1])

if authenticate:
# set up authentication info
authinfo = urllib2.HTTPBasicAuthHandler()
authinfo.add_password(realm=auth_info['realm'],
uri=base_href,
user=auth_info['user'],
passwd=auth_info['passwd'])

# build a new opener that adds authentication and install it
opener = urllib2.build_opener(authinfo)
urllib2.install_opener(opener)

f = urllib2.urlopen(url)

assert f.code == 200

body = f.read()
fragment = lxml.html.fromstring(body)
fragment.make_links_absolute(base_href)

metadatas = fragment.cssselect("div.metadata.primary>ul>li")
metadata_dict = {}

try:
metadata_dict['citation'] = _get_metadata_citation(fragment)
except IndexError:
pass

for metadata in metadatas:
try:
key = metadata.cssselect('h3')[0].text
except IndexError:
continue
# here's hopin' bvault is ready. maria says yes
if key == "Description":
metadata_dict['description'] = \
_get_metadata_description(metadata)
continue
if key == "Related":
related = _get_metadata_related(metadata)
if related:
metadata_dict['segments in this record'] = related
pass

#assert metadata_dict.has_key('description')
#assert metadata_dict.has_key('related')

metadata = simplejson.dumps(metadata_dict)
asset.metadata_blob = metadata
asset.save()

return metadata

def _get_metadata_citation(html, format=None):

if format is None:
format = 'chicago'

citation = html.cssselect("div.citation#cite_%s" % format)
citation = citation[0].text_content()
return citation.replace('<', '&lt;').replace('>', '&gt;')

# XXX TODO: just pass in the whole html fragment, dude
def _get_metadata_related(metadata):
metadatas = metadata.cssselect("div.content ul>li>div.hentry")

if len(metadatas):
return ''.join(tostring(metadata).replace('\n', '')
for metadata in metadatas)
else:
return None

# XXX TODO: just pass in the whole html fragment, dude
def _get_metadata_description(metadata):

metadatas = metadata.cssselect("div.content>div")

description = None
for metadata in metadatas:
key = metadata.getchildren()[0]
if key.tag.upper() != "STRONG":
continue
if not key.text.endswith("Description:"):
continue
description = metadata.text_content()[len(key.text):]
break

assert description is not None
return description

Loading

0 comments on commit 2b579f8

Please sign in to comment.