-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtestHBase.py
42 lines (41 loc) · 11.1 KB
/
testHBase.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# http://happybase.readthedocs.org/en/latest/user.html
import happybase,sys
import numpy as np
connection = happybase.Connection('10.1.94.57')
# This fails...
#alltables = connection.tables()
tab = connection.table('aaron_memex_ht-images')
if __name__ == '__main__':
if len(sys.argv)>1:
my_ids_int= [int(sys.argv[1])]
else:
my_ids_int = [8496680,100000000]
my_ids_int = list(np.random.randint(0,107415056,5))
my_ids_int = [100034125,100000118,1000,8496680]
my_ids_int = [56545928]
my_ids_int = [17815649, 39711480, 40083023, 40324020, 42712983, 42753871, 43195171, 43324889, 43349265, 43495951, 43617738, 44049211, 44058657, 44890080, 45739339, 45783932, 45922920, 46412393, 47090664, 47264695, 47398734, 47448606, 48168621, 48686630, 54681420, 55893391, 56026276, 56634619, 56714538, 56754437, 56819378, 56960450, 57482122, 57494671, 57532230]
my_ids_int = [ 17815649, 44860427, 54510124, 43324844, 19306958, 107134894, 6560421, 51209727, 53820099, 1273968, 39548431, 73528854, 72231626, 19197978, 6518422, 3080617, 6311804, 3092522, 1256004, 141451, 39815606, 17847445, 6778472, 6480475, 2998889, 6872944, 6972631, 4925546, 1980746, 6289776, 25653902, 2385631, 90594, 1067184, 6876171, 70622683, 4509317, 8263769, 2997820, 131116, 4351033, 20541, 6870904, 20342837, 4324537, 3092401, 729843, 3049784, 6614643, 73246595, 49704893, 4276916, 4560355, 6881804, 51381018, 44924209, 980421, 19180453, 6936669, 2997801, 8264139, 4717308, 50169838, 6461760, 52702040, 3538684, 56298532, 100204636, 63648925, 50165073, 73095325, 72184223, 7886239, 202657, 20342860, 278801, 20605652, 73571909, 2997875, 6420057, 20461340, 1106663, 85776819, 82321316, 3616390, 3693459, 632387, 3147918, 86003889, 106789780, 3080861, 8057891, 391525, 999266, 20965044, 81123427, 4649602, 4162448, 4162436, 59962438, 58935934, 6908028, 59207998, 4668718, 72109014, 6929884, 60102343, 1051739, 3125941, 72210824, 64106789, 72525381, 50448472, 383186, 5447682, 20773, 752910, 2498443, 4881336, 106684926, 1250, 65613968, 3690415, 72659821, 72172600, 69296031, 3133324, 6401813, 7623302, 3125436, 6285602, 2977466, 3085121, 6582289, 49847714, 4261881, 8210736, 5309394, 73414954, 107096272, 50064492, 2232083, 55730792, 8388658, 8195558, 44392353, 57164848, 40059603, 56337259, 20299593, 251340, 67214479, 92778811, 62402441, 80567686, 2705870, 77623063, 7877618, 83634920, 82054319, 7868780, 3527502, 7877925, 105681016, 104347218, 59975043, 1406162, 2164483, 20463646, 8130664, 3117382, 8117884, 6790587, 3017446, 8128562, 3665153, 72501353, 66355002, 64533779, 72374896, 42962701, 107390357, 98690841, 7615344, 3495586, 20806198, 3673813, 8025455, 4277095, 73126627, 77906809, 604674, 2630106, 20196712, 925002, 5432559, 660106, 93174593, 68987, 3903603, 39714766, 3446959, 6294961, 92321958, 76041021, 73378454, 8317088, 6369270, 4072290, 2947296, 3838232, 6473471, 54300455, 3752369, 19568040, 19657435, 72142150, 585294, 21017248, 84210867, 2096082, 71261236, 44498502, 4917631, 72190832, 70268267, 1805744, 61978726, 60470436, 2413429, 20799672, 20423506, 6328651, 3897680, 75386530, 74095766, 3798734, 7459064, 19939583, 2112177, 7610939, 3527949, 49913381, 3792460, 47993, 672939, 861974, 106700089, 75484206, 83486045, 4234482, 255012, 20936631, 594025, 2949713, 2704571, 1994993, 55766, 1443287, 2630858, 53298014, 75144739, 5131179, 19292441, 5087008, 56201573, 8366738, 2635503, 211070, 2340167, 20516406, 85188796, 150653, 616117, 96512293, 49575723, 65246923, 4438188, 4434165, 20416277, 4403465, 835307, 1266239, 2106051, 4943229, 6784973, 20544312, 32755, 6705854, 7828878, 2770854, 6268605, 6463191, 65644454, 6837344, 8090715, 3066314, 4604039, 50046786, 20388017, 4297444, 7459587, 2211486, 49616, 6283269, 2771100, 72810659, 52627885, 20877132, 1060478, 51508837, 64277744, 7775769, 3302810, 68151172, 65109164, 5159672, 20415916, 68088945, 3609257, 6585973, 4288720, 73026217, 1406788, 52791968, 5696338, 5812139, 5537662, 20654142, 6978463, 352012, 7707727, 3841299, 3058828, 7558878, 51555886, 52355570, 8225661, 49427490, 10565, 20833884, 5687335, 7539549, 20831101, 5259575, 1344299, 52140095, 6255113, 3872675, 20797817, 4132854, 6383987, 3032817, 6831424, 44269827, 2303384, 6264167, 55726619, 54165908, 49939209, 49938247, 7912987, 3114722, 8004176, 61770062, 54232080, 52424601, 432595, 20571257, 6745372, 6470531, 3475793, 93513494, 104498266, 7118333, 39426055, 3880708, 7041453, 353736, 94375194, 20163717, 2411620, 51205481, 41223016, 20017436, 3879480, 20060975, 79327228, 61782211, 6263672, 2234858, 91715172, 21057573, 3566268, 6270828, 8245693, 642116, 74412417, 4410445, 86967535, 6430936, 7951175, 6384507, 45389539, 6599055, 63514240, 388473, 39564769, 20824722, 6382025, 3889738, 87864011, 21214391, 217566, 20472997, 58120976, 6749676, 3890149, 6247887, 323609, 21036212, 750602, 58506677, 4277646, 6885591, 20769708, 6251980, 3984133, 3684979, 5443704, 991530, 76064, 4816187, 3758680, 1548679, 2894884, 7821257, 4012623, 7573787, 3463767, 3984134, 7086047, 652269, 547453, 5097590, 2249419, 39332116, 997967, 1580891, 851146, 5022266, 81089172, 563236, 20301326, 4813680, 3738289, 6544580, 6727213, 307541, 17210, 1056420, 4805366, 6344806, 3486704, 3470398, 6288207, 3244507, 3189933, 6260789, 6434590, 3190287, 6664986, 51343048, 3811401, 5802006, 159783, 3803335, 3887860, 6248501, 4058667, 3456931, 107427198, 186005, 39649984, 7457907, 58570815, 97261998, 8361855, 4261544, 7980314, 62437824, 7714941, 61608, 74627304, 738965, 19866720, 8090523, 3072103, 401804, 50109884, 66335059, 50783509, 6782384, 1677649, 1484626, 6533587, 1426019, 475348, 39344100, 54648974, 94260466, 1849077, 1748369, 19614584, 39870876, 1345717, 51006833, 3515910, 39779696, 2923676, 39803783, 2894878, 8030857, 2896904, 7368488, 8822096, 52216538, 20377047, 20466492, 50600140, 391103, 2314427, 52231003, 53422813, 403145, 6439696, 2815376, 39749907, 6339959, 4114539, 8380692, 67488420, 6767227, 6661842, 107409253, 39824271, 5422638, 72918733, 56701481, 56743246, 56339796, 45091462, 3805127, 93490229, 3805194, 3359567, 7883654, 4292080, 4990530, 44312461, 1901189, 8090685, 3092430, 20894911, 657789, 43774549, 20765209, 2856259, 6382355, 49164999, 51961041, 6690166, 4120230, 6956466, 7572801, 2873149, 53029212, 21108670, 50000811, 6954189, 2629744, 49967079, 54629118, 53958016, 55819454, 43095496, 4731122, 5609037, 87614147, 3078706, 8048379, 6400360, 20002694, 19474848, 166620, 93530998, 1015224, 56584898, 4231950, 101108281, 216888, 44048731, 140813, 1426350, 1152602, 6680660, 42738230, 7975074, 1098743, 51872026, 62361832, 8046501, 54123862, 8111667, 5070212, 4170536, 3125072, 6480342, 8342341, 6962089, 19886701, 4130002, 8307197, 97919459, 4121255, 6263917, 3633320, 8090776, 3067604, 3172924, 8212405, 6260240, 4125940, 6787070, 19532164, 3562171, 2937164, 43016548, 984395, 5208316, 59427546, 3750194, 3631166, 6844429, 54867260, 96580410, 45163160, 62794453, 50718378, 91895062, 44512271, 44931443, 40242547, 93769167, 50378739, 4688978, 7421042, 50601715, 53972925, 3122045, 20184417, 3424843, 19611721, 6729518, 80537302, 56631098, 2198047, 924983, 1232101, 1130509, 20476709, 71728278, 43620156, 2746559, 73090949, 673772, 7891282, 6444218, 3829413, 3824120, 3814323, 7004958, 3821361, 7004225, 3819408, 20476674, 72182524, 44057446, 96706237, 8206265, 120148, 5700485, 50028363, 50135339, 39724108, 8206489, 20773114, 73232486, 786804, 6577717, 7187057, 3509146, 55251824, 53357303, 784501, 1334754, 44967241, 1075327, 107886389, 40040653, 19181916, 44372293, 39957183, 8816918, 8001636, 363053, 20133703, 76165776, 40064713, 2808662, 2874637, 2885085, 6528564, 49316618, 57014590, 20899652, 2997694, 6528934, 7459970, 4048380, 6897435, 53177223, 17844967, 4276529, 3070477, 8089296, 6985682, 2820946, 52082440, 1209934, 80670725, 5843792, 52982027, 2257415, 39683109, 29727608, 3799707, 39900023, 50940104, 8205957, 54189846, 7730953, 70336625, 3664352, 6408885, 4072889, 25687, 42787168, 7927419, 3689004, 101127886, 4188183, 19180421, 43092171, 7422301, 6859846, 19427923, 61373130, 7241303, 2791486, 2986998, 50215903, 824130, 4271954, 8367508, 56767118, 51145072, 54584601, 50244397, 3775937, 39836990, 3112045, 7925813, 53619923, 50123097, 4972011, 107102576, 5551216, 3680494, 50769749, 19857178, 4108237, 5834937, 5808544, 20409640, 54177979, 1004703, 20354369, 506379, 49960835, 49295946, 19524131, 20636674, 59551082, 1359114, 52191105, 7333791, 1687103, 52252076, 50559356, 105002273, 59370872, 58735326, 627987, 56681130, 52788868, 3890742, 6443675, 5310931, 4238204, 7483574, 2268329, 55889172, 3570442, 7494502, 53898068, 2788467, 2895186, 245936, 5062632, 56637867, 3794367, 1624822, 72542478, 4594221, 55865351, 50054894, 50256772, 53642514, 39344881, 4834698, 8006145, 3794539, 3751117, 6418629, 61502081, 56428046, 99935987, 1719245, 62004989, 51187713, 39952522, 3215677, 19236502, 3430243, 6890429, 4395630, 56666274, 4156387, 83873777, 5808340, 6655095, 107364398, 39882351, 300706, 670196, 55156971, 67670416, 19318225, 2921172, 6640035, 4434053, 915790, 49378994, 75418662, 96744536, 8336750, 2791702, 56324055, 20481530, 100916264, 65793427, 662380, 52888740, 2171157, 4749223, 6338189, 4231914, 8257654, 103662456, 6854110, 4681738, 6382826, 52918393, 39712580, 81330404, 49415374, 96717198, 49795446, 49788201, 5487609, 49238716, 19988932, 7463724, 2759027, 61780355, 20273470, 2836590, 51391521, 8685748, 1850245, 53093552, 51666431, 2556455, 60666441, 61521230, 6531753, 20766066, 1240577, 93422536, 97570891, 66816005, 62574924, 8407150, 61970103, 4356268, 38014, 6649828, 4015556, 121583, 52395642, 5391403, 53361558, 554102, 19932902, 1322416, 55174846, 53049583, 2225522, 4536226, 103943065, 6247599, 1643336, 8046790, 42708477, 4138143, 4995540, 79931226, 2802659, 8046813, 20082260, 49458491, 19308483, 19534328, 2329371, 501580, 1373424, 44420758, 7977912, 53974200, 51091481, 7877206, 20496891, 57738445, 287492, 3063509, 39504231, 19757799, 44993588, 20666572, 1287285, 62437843, 78722431, 3921822, 52837491, 20673695, 1371286, 53197842, 51092114, 50247066, 4027933, 62902254, 62074154, 53163911, 75911879, 2604712, 56844959, 20775745, 72616866, 6750257, 44029184, 8208640, 658738, 51754763, 8818039, 32945798, 65004077, 62261207, 45216000, 5485393 ]
my_ids_str = [str(one_id) for one_id in my_ids_int]
print my_ids_str
all_rows=tab.rows(my_ids_str)
min_sim=None
for one_row in all_rows:
#print one_row
try:
nb_sim_imgs=len(one_row[1]['meta:columbia_near_dups'].split(","))
if not min_sim:
min_sim=nb_sim_imgs
else:
min_sim=min(min_sim,nb_sim_imgs)
#print one_row[0],"has "+str(len(one_row[1]['meta:columbia_near_dups'].split(",")))+" near duplicate ids:", one_row[1]['meta:columbia_near_dups']
print one_row[0],"has "+str(nb_sim_imgs)+ " similar images"
#print one_row[0],"has "+str(len(one_row[1]['meta:columbia_near_dups_dist'].split(",")))+" near duplicate distances:",one_row[1]['meta:columbia_near_dups_dist']
print one_row[0],"biggest_dbid:",one_row[1]['meta:columbia_near_dups_biggest_dbid']
print one_row[0],"Image URL:",one_row[1]['meta:location']
except:
try:
img64=one_row[1]['image:orig']
print one_row[0],"not yet indexed. Call the Colubmia Search API if you really need the similar images."
except:
print one_row[0],"may have failed to be downloaded and could be no longer available, hence we have no hash codes or features."
print min_sim