do_ci

#!/usr/bin/env python3.1
# -*- coding: UTF-8 -*-

# Wed Aug 31, 02:46 sunuslee
# sunuslee (at) gmail(dot)com
# This is my first python program , with my first cgi page and first apache server
# You can do anything you want with those files under ONLY ONE condition:
# Please Do keep those lines above
import threading
import queue
import sys
import urllib.parse
#import httplib2
import time
import random
import os
import cache
import dl
import pickle
import gzip
import io
CSS = True
IS_LOCAL = True
ROOTDIR = "/home/sunus/apache/" if IS_LOCAL == True else "/usr/local/apache2/"
LINK_DB_PEOPLE = "http://www.douban.com/people/"
people_nr = 5
people_list = []
table = []
cat_chs = {'movie':'电影','music':'音乐','book':'书籍'}
global fh_write_html
class Worker(threading.Thread):

        def __init__(self, work_queue):
                super().__init__()
                self.work_queue = work_queue
        def run(self):
                while True:
                        try:
                                uid, cat, start, que  = self.work_queue.get()
                                tname = self.getName()
                                self.process(uid, cat, start, que, tname)
                        finally:
                                self.work_queue.task_done()
        
        def process(self, uid, cat, start, que, tname):
                dl.download_t(uid, cat, start, que, tname)

def get_user_collection(user, cat):
        file_path = ROOTDIR + r'htdocs/cache_datas/' + 'cache_' + user[0] + '_' + cat
        item_dict = cache.cache_load(file_path)
        table.append([user, item_dict])

def get_user(group_uri, where, you):
        where = urllib.parse.quote(where)
        global fh_write_html
        global people_list
        global people_nr
        start = 13577293
        uri = "{0}/member_search?search_text={1}&start={2}".format(group_uri, where, start)
        req = urllib.request.Request(uri, headers= {'Accept-encoding':'gzip'})
        rec = urllib.request.urlopen(req)
        compressed_data = rec.read()
        compressed_fh = io.BytesIO(compressed_data)
        gzipper = gzip.GzipFile(fileobj = compressed_fh);
        data = gzipper.read().decode('utf8')
        max = 0
        for line in data.splitlines():
                if str(13577294) in line:       # this num will SOMEHOW INCREASE 1
                        max = int(line.split('-')[1].split()[0])
        start = 0 if max == 0 else random.randint(0, int(max) - people_nr)
        #start = 307
        uri = "{0}/member_search?search_text={1}&start={2}".format(group_uri, where, start)
        req = urllib.request.Request(uri, headers= {'Accept-encoding':'gzip'})
        rec = urllib.request.urlopen(req)
        compressed_data = rec.read()
        compressed_fh = io.BytesIO(compressed_data)
        gzipper = gzip.GzipFile(fileobj = compressed_fh);
        contents = gzipper.read().decode('utf8')
#       line is like this pattern:
#        <dd><a href="http://www.douban.com/people/__UID__/">__NICKNAME__</a><br/><span class="pl">(local)</span></dd></dl><dl class="obu" ><dt><a href="http://www.douban.com/people/__UID__/" class="nbg"><img src="http://img3.douban.com/icon/sdfs.jpg" class="m_sub_img" alt="__NIKENAME__"/></a></dt>
        for line in contents.splitlines():
                if '''<div class='obss'><dl class="obu" >''' in line:
                        first_icon = None
                        for l in line.split('"'):
                                if l.endswith('.jpg'):
                                        first_icon = l
                                        cur_user_icon = first_icon
                if '<dd><a href="http://www.douban.com/people/' in line:
                        if(len(people_list) == people_nr + 1 and max != 0):
                                break
                        next_user_icon = None
                        for l in line.split('"'):
                                if l.endswith('.jpg'):
                                        next_user_icon = l
                        idx = line.strip().find(r'</a>')
                        a = line.strip()[0:idx].rpartition(r'">')
                        if a[0][13:-1].split('/')[-1] != you:
                                people_list.append([a[0][13:-1].split('/')[-1], a[2], cur_user_icon])
                        if next_user_icon != None:
                                cur_user_icon = next_user_icon
#       people_list[0] is YOU!, if max == 0, that means all the users are in the first page, which can't show the number of max, so max == 0,got it from previous
#       lines, so, we need to get all the users in the first pages,which the number could be (0~35).Insteading of just get the number of people_nr
        if max == 0:
                if len(people_list) - people_nr > 0:
                        start = random.randint(0, len(people_list) - people_nr)
                        people_list = people_list[0:1] + people_list[start + 1 : start + 1 + people_nr]
#       here, continue with the max == 0 case, get the random 'start' right,then cut the size of people_list to 1 + people_list
        fh_write_html.write("<h4>你的幸运数字是 *{0}*</h4>\n".format(start))


#this function return the codes for print n light starts and maxstar - n dark stars
IMG_PATH = "/imgs/"
def print_stars(n, maxstar, summary='', item_url=''):
        if n  == -1:
                n = 0
        if summary == '-1':
                summary = ''
        n_st = n;
        n_nst = maxstar - n
        left = '<a href="{0}" title="{1}">'.format(item_url, summary)
        s1 = '<img src="{0}sg.png" />'.format(IMG_PATH)
        s2 = '<img src="{0}sw.png" />'.format(IMG_PATH)
        html_code = left + s1 * n + s2 * n_nst
        if summary != '':
                html_code += '<img src="{0}info.png" /></a>'.format(IMG_PATH)
        else:
                html_code += '<img src="{0}info1.png" /></a>'.format(IMG_PATH)
        return html_code

# This function returns the uid1-uid2's match rate
# uid1 and uid2 has it's own index in table.
def get_match_rate(uid1_idx, uid2_idx, cat):
        global fh_write_html
        cat_local = cat_chs[cat]
        u1_set = set()
        u2_set = set()
        common_set = set()
        try:
                for item_name in table[uid1_idx][1].keys():
                        u1_set.add(item_name)
                
                for item_name in table[uid2_idx][1].keys():
                        u2_set.add(item_name)
        except IndexError as e:
                fh_write_html.write("<h4>Got the problem, uid1_idx = {0} uid2_idx = {1}</h4>\n".format(uid1_idx, uid2_idx))
                return None
        common_set = u1_set & u2_set
        nickname1 = table[uid1_idx][0][1]
        nickname2 = table[uid2_idx][0][1]
        icon1     = table[uid1_idx][0][2] 
        icon2     = table[uid2_idx][0][2]
        #disable icons for now
        #icon1     = '/imgs/empty.png'
        #icon2     = '/imgs/empty.png'
        fh_write_html.write('<h4><a href="{0}{1}">{2}</a>\
              和\
              <a href="{0}{3}">{4}</a>\
              都喜欢的{5}有({6}):</h4>\n'.format(LINK_DB_PEOPLE, table[uid1_idx][0][0], nickname1,
                                                                 table[uid2_idx][0][0], nickname2,
                                                                 cat_local, len(common_set),))
        if len(common_set) != 0:
                fh_write_html.write('<table border="1" width="600px">\n')
                fh_write_html.write('<tr>\
                <th align="left" width="300px"><h4>{1}</h4></th>\
                <th align="left" width="300px"><h4><a href="{0}{2}"><img src="{4}" /><br>{3}</a></h4></th>\
                <th align="left" width="300px"><h4><a href="{0}{5}"><img src="{7}" /><br>{6}</a></h4></th></tr>\n'.format(LINK_DB_PEOPLE, "match", 
                                                                                                              table[uid1_idx][0][0], nickname1, icon1,
                                                                                                              table[uid2_idx][0][0], nickname2, icon2))
                link_db_item = "http://{0}.douban.com/subject/".format(cat)
                for item_name in common_set:
                        name = table[uid1_idx][1][item_name][2] if table[uid1_idx][1][item_name][2] != '-1' else item_name
                        item_url = table[uid1_idx][1][item_name][1]
                        fh_write_html.write('<tr>\
                        <td width="300px"><h4><a href="{5}" target="_blank">{0}</a></h4></td>\
                        <td width="300px"><h4>{1}<br>{2}</h4></td>\
                        <td width="300px"><h4>{3}<br>{4}</h4></td></tr>\n'.\
                        format(name, 
                             table[uid1_idx][1][item_name][0], print_stars(int(table[uid1_idx][1][item_name][3]), 5, table[uid1_idx][1][item_name][4], item_url),
                             table[uid2_idx][1][item_name][0], print_stars(int(table[uid2_idx][1][item_name][3]), 5, table[uid2_idx][1][item_name][4], item_url),
                             item_url))
                fh_write_html.write("</table>\n")
        rate = len(common_set)
        return [uid2_idx, rate]

def get_user_icon(uid):
        fh = urllib.request.urlopen('http://www.douban.com/people/' + uid)
        cont = fh.read().decode('utf8')
        for line in cont.splitlines():
                if 'douban.com/icon' in line:
                        icon = line.split('"')[1]
                        return icon


def main():
        global people_list
        global fh_write_html
        fa = open('./wait_queue', encoding = 'utf8')
        args = fa.readline()[:-1]
        args = args.split('\t')
        fa.close()
        you = args[0]
        your_nickname = args[1]
        group_url = args[2]
        group_name = args[3]
        if group_url.endswith('/') == False:
                group_url += '/'
        location = args[4]
        cat = args[5]
        short_url = args[7]
        html_page_path = args[8]

        fh_write_html = open(html_page_path, "w", encoding = "utf8")
        fh_write_html.write("<html>\n")
        fh_write_html.write("<head>\n")
        fh_write_html.write('<meta http-equiv="content-type" content="text/html; charset=UTF-8"/>\n')
        fh_write_html.write("<title>Result</title>\n")
        if CSS == True:
                fh_write_html.write('<link href="styles.css" rel="stylesheet" type="text/css" />\n')
        fh_write_html.write('<!--[if gte IE 6.]>\n')
        fh_write_html.write('<script defer type="text/javascript" src="pngfix.js"></script>\n')
        fh_write_html.write('<![endif]-->\n')
        fh_write_html.write("</head>\n")
        fh_write_html.write("<body>\n")
        you_icon = get_user_icon(you)
        people_list.append([you, your_nickname, you_icon])
        get_user(group_url, location, you)
        if len(people_list) == 1:
                fh_write_html.write("<h4>该小组好似没有来自 {0} 的豆友噢:(</h4>\n".format(location))
                fh_write_html.write("<h4>转发本页地址:{0}</h4>\n".format(short_url))
                fh_write_html.write("</body>\n")
                fh_write_html.write("</html>\n")
                print('New page:', short_url)
                return 0

        # Thread Part
        dl_queue = queue.Queue()
        for i in range(5):
                worker = Worker(dl_queue)
                worker.daemon = True
                worker.start()

        for people in people_list:
                file_path = ROOTDIR + r'htdocs/cache_datas/' + 'cache_' + people[0] + '_' + cat
                if os.access(file_path, os.R_OK) == False:
                        dl_queue.put([people[0], cat, 1, dl_queue])
                        dl_queue.join()
                        if dl.EXIT == True:
                                fh_write_html.write("<h4> DL.exit =" + str(dl.EXIT) +  "</h4>\n")
                                fh_write_html.write("<h4>转发本页地址:{0}</h4>\n".format(short_url))
                                fh_write_html.write("</body>\n")
                                fh_write_html.write("</html>\n")
                                print('New page:', short_url)
                                return 0

                        cache.cache_save(ROOTDIR + r'htdocs/cache_datas/' + people[0] + '_' + cat + '_1')
                        get_user_collection(people, cat)
                else:
                        people[1] += '(c)'
                        get_user_collection(people, cat)
        # Thread Part
        rank = []
        # 0 is uid1, and uid2_idx starts from 1
        people_list.pop(0)
        # YOU DO NOT BELONG TO PEOPLE_LIST ANYMORE!!
        total_people = len(people_list)
        for i in range(1, total_people + 1):
                rank_item = get_match_rate(0, i, cat)
                if rank_item != None:
                        rank.append(rank_item)
        rank.sort(key = lambda node:node[1], reverse = True) 

        fh_write_html.write('<h4>在 <a href="{0}">{1}</a> 为您找到了在 {2} 的豆友:</h4>\n'.format(group_url, group_name, location))
        fh_write_html.write('<!-- saying:glc\t{0}\t{1}\t{2}\t -->\n'.format(group_name, location, cat_chs[cat]))
        r = 1
        ir = 0
        for i in range(ir, total_people):
                uid      = table[rank[i][0]][0][0] # use to @somebody
                nickname = table[rank[i][0]][0][1] # use to show on html page
                icon     = table[rank[i][0]][0][2] # use to show user icon
                #icon     = '/imgs/empty.png'       # disable icon for now
                fh_write_html.write('<h4>#{0:<4} <a href="{3}{4}"><img src="{5}" />{1}</a>和你有 {2} 项共同喜好</h4>\n'.format(r, nickname, rank[i][1], 
                                                                                                     LINK_DB_PEOPLE, uid, icon))
                fh_write_html.write('<!-- saying:rank\t{0}\t{1}\t{2}\t -->\n'.format(r, uid, rank[i][1]))
                r += 1
        fh_write_html.write("<h4>转发本页地址:{0}</h4>\n".format(short_url))
        fh_write_html.write("</body>\n")
        fh_write_html.write("</html>\n")
        fh_write_html.close()
        print('Result: {0}\t{1}\t{2}\t{3}'.format(cat, you, group_url, short_url))
        return

main()