moving ccnmtl-specific code off tree

git-svn-id: http://svn.ccnmtl.columbia.edu/mondrian/trunk@18327 1f418930-7ff8-0310-b8ad-c653122473bc
zmustapha · Jan 5, 2010 · 2b579f8 · 2b579f8
commit 2b579f8
Show file tree

Hide file tree

Showing 219 changed files with 21,371 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,4 @@
+._*
+*~
+*.pyc
+ve
diff --git a/PLANS b/PLANS
@@ -0,0 +1,91 @@
+CLEANUP
+----------
+
+1. registration/login.html (needs a link and text blurb)
+2. remove fromprod.sh
+3. alter /apache/
+4. flunc tests rel. bvault
+5. bvault lxml crawl
+6. settings_shared: SECRET_KEY, WIND stuff, ADMINS,INTERNAL_IPS
+
+
+
+StructuredCollaboration Plans
+-----------------------------
+	ARCHITECTURE
+	1. objects are unique to a 'context' (locally shared ancestor)
+	2. shallow structure except for recursively-typed ones
+	3. setting content_object to a Collaboration 
+		a. will parse down for finding descendents
+		b. but NOT for access control
+		c. works like a symlink
+
+
+	QUERIES (current)
+	1. request.course still exists
+	2. read permissions for context =course (project,asset)
+		Project.objects.get(pk=pk,pk__in= )
+	3. user projects,assets,sherds by (course)
+
+	USE CASES (future)
+	1. Ordered collections (for individuals)
+	2. Shared collections (for student teams)
+	3. Transcripts
+	4. Attaching objects to assignments, etc.
+        5. Publishing (to the world, or wider than course/context)
+
+	CURRENT FILES (to edit)
+	projects/views.py
+		==hard queries==
+		*get_user_projects()
+		*all user Sherds from a course
+		*all projects from a course with user as participant
+                 -- ?index all objects by user (user,object,context)
+		*all course tags (through sherds)
+		 -- ?cache (with update on new tag)
+	assetmgr/views.py
+		==hard queries==
+		*all user Sherds from a course
+		--we should optimize assuming people are generally in
+		  a single course (post query filter should be fine)
+		*random (GET args) Asset query restricted to a course
+		*all assets from a course 
+		--query collection directly
+		*all course tags (through sherds)
+		--maybe auto-tag domain, as well?
+		-- ?cache (with update on new tag)
+	projects/teplatetags/user_projects.py
+	projects/models.py @get_user_projects() (called in projects/views.py)
+	templates/projects/classlisting.html (user_projects)
+
+
+
+!!! = context
+
+Course1 (!!! only for creating a new collection/asset/project)
+  Asset1
+    'Transcript' (stub Collaboration)
+       Sherd[0] (0:00-0:15) "Good evening, ladies and gentlemen,"
+       Sherd[1] (0:15-0:30) "Blah blah blah,"
+       ...
+
+  Project1 [user]
+  Project2 [group] !!!
+    Sherd[0] (group-owned annotation)
+    ColXX (stub Collaboration) = COMPARISON 
+      Sherd[1]
+      Sherd[2]
+  ...
+
+  'Collection1' (stub Collaboration) [id=666,user=a] !!!
+	Sherd[id=1]
+	Sherd[id=2]
+  'Collection2' (stub Collaboration) [user=a] !!!
+	Sherd[id=3]
+	Collaboration[id=666]
+	Sherd[id=4]
+
+  ?DiscussionBoard1
+	Sherd[id=3] (objects to attach to the discussion board)
+	Asset1
+
diff --git a/__init__.py b/__init__.py
diff --git a/assetmgr/__init__.py b/assetmgr/__init__.py
diff --git a/assetmgr/admin.py b/assetmgr/admin.py
@@ -0,0 +1,5 @@
+from models import Asset, Source
+from django.contrib import admin
+
+admin.site.register(Asset)
+admin.site.register(Source)
diff --git a/assetmgr/lib.py b/assetmgr/lib.py
@@ -0,0 +1,161 @@
+import simplejson
+import lxml.html
+from lxml.html import tostring
+
+from urlparse import urlsplit
+import urllib2
+
+def annotated_by(assets, user):
+    assets = assets.filter(
+        sherdnote__author=user).distinct().select_related()
+    to_return = []
+    for asset in assets:
+        if asset.sherdnote_set.filter(author=user).exclude(
+            range1=None, range2=None, title=None,
+            tags='', body=None).count() == 0:
+            continue
+        to_return.append(asset)
+    return to_return
+
+def most_popular(assets):
+    """
+    considers popularity == number of distinct users who annotated
+    the asset in any way (tag, global annotation, clip, etc)
+    """
+    most_popular = {}
+    for asset in assets:
+        users_who_annotated_it = {}
+        for annotation in asset.sherdnote_set.all():
+            if not users_who_annotated_it.has_key(annotation.author):
+                users_who_annotated_it[annotation.author] = 0
+            users_who_annotated_it[annotation.author] += 1
+        popularity = len(users_who_annotated_it)
+        setattr(asset, 'popularity', popularity)
+        most_popular.setdefault(popularity, []).append(asset)
+
+    pop_hash = most_popular
+    most_popular = []
+    for count, assets in reversed(pop_hash.items()):
+        most_popular.extend(assets)
+    return most_popular
+
+
+def get_metadata(asset, authenticate=False, **auth_info):
+    """
+    gets metadata for the asset and saves it to the database in a json dict
+
+    if `authenticate` is True, then HTTP Basic Authentication will be used
+    with realm, user and passwd information passed in as kwargs.
+
+    important notes about the current implementation:
+     * it's extremely coupled to the openvault site. it will not work for
+       any assets that were not taken from the openvault site, period.
+     * it blindly makes an http request to the asset url. so this really
+       should not be done synchronously. it's being done synchronously.
+     * it currently only stores the asset description. other metadata can
+       be added as needed.
+     * it does a screenscrape of the html. did i mention it's coupled to
+       the openvault implementation?
+    """
+
+    html_content = asset.html_source
+    # ^^ will error if there is more than one hit, i think?
+
+    if not html_content:
+        # i dunno. `url` might ought to just be a required source?
+        return
+
+    url = html_content.url
+    base_href = urlsplit(url)
+    base_href = "%s://%s" % (base_href[0], base_href[1])
+
+    if authenticate:
+        # set up authentication info
+        authinfo = urllib2.HTTPBasicAuthHandler()
+        authinfo.add_password(realm=auth_info['realm'],
+                              uri=base_href,
+                              user=auth_info['user'],
+                              passwd=auth_info['passwd'])
+
+        # build a new opener that adds authentication and install it
+        opener = urllib2.build_opener(authinfo)
+        urllib2.install_opener(opener)
+
+    f = urllib2.urlopen(url)
+
+    assert f.code == 200
+
+    body = f.read()
+    fragment = lxml.html.fromstring(body)
+    fragment.make_links_absolute(base_href)
+
+    metadatas = fragment.cssselect("div.metadata.primary>ul>li")
+    metadata_dict = {}
+
+    try:
+        metadata_dict['citation'] = _get_metadata_citation(fragment)
+    except IndexError:
+        pass
+
+    for metadata in metadatas:
+        try:
+            key = metadata.cssselect('h3')[0].text
+        except IndexError:
+            continue
+        # here's hopin' bvault is ready.  maria says yes
+        if key == "Description":
+            metadata_dict['description'] = \
+                _get_metadata_description(metadata)
+            continue
+        if key == "Related":
+            related = _get_metadata_related(metadata)
+            if related:
+                metadata_dict['segments in this record'] = related
+            pass
+
+    #assert metadata_dict.has_key('description')
+    #assert metadata_dict.has_key('related')
+
+    metadata = simplejson.dumps(metadata_dict)
+    asset.metadata_blob = metadata
+    asset.save()
+
+    return metadata
+
+def _get_metadata_citation(html, format=None):
+
+    if format is None:
+        format = 'chicago'
+
+    citation = html.cssselect("div.citation#cite_%s" % format)
+    citation = citation[0].text_content()
+    return citation.replace('<', '&lt;').replace('>', '&gt;')
+
+# XXX TODO: just pass in the whole html fragment, dude
+def _get_metadata_related(metadata):
+    metadatas = metadata.cssselect("div.content ul>li>div.hentry")
+
+    if len(metadatas):
+        return ''.join(tostring(metadata).replace('\n', '') 
+                       for metadata in metadatas)
+    else:
+        return None
+
+# XXX TODO: just pass in the whole html fragment, dude
+def _get_metadata_description(metadata):
+
+    metadatas = metadata.cssselect("div.content>div")
+
+    description = None
+    for metadata in metadatas:
+        key = metadata.getchildren()[0]
+        if key.tag.upper() != "STRONG":
+            continue
+        if not key.text.endswith("Description:"):
+            continue
+        description = metadata.text_content()[len(key.text):]
+        break
+
+    assert description is not None
+    return description
+
-Original file line number
+Diff line change
@@ -0,0 +1,4 @@
+    ._*
+    *~
+    *.pyc
+    ve