From 96add96a8f1875c0864a8f4f8d7934d5541d3b2d Mon Sep 17 00:00:00 2001 From: Ashwin Nair Date: Sun, 19 Apr 2020 23:17:14 +0400 Subject: [PATCH] Remove converter functionality. Post v0.3.0, converter functionality will not be supported. This was motivated by two reasons: 1. Only TFRecord was supported till now. Tensorflow is a large library and it does not make sense for it to be a project requirement when it's only used for a singular function. 2. There are too many conversion formats. Trying to include even the most popular ones makes the project unwieldy. Better to use one of the readily available scripts online. --- .github/coco.png | Bin 7364 -> 0 bytes README.md | 17 ++-- coco_assistant/VERSION | 2 +- coco_assistant/coco_assistant.py | 23 +---- coco_assistant/coco_converters.py | 138 ------------------------------ requirements.txt | 1 - 6 files changed, 9 insertions(+), 172 deletions(-) delete mode 100644 .github/coco.png delete mode 100755 coco_assistant/coco_converters.py diff --git a/.github/coco.png b/.github/coco.png deleted file mode 100644 index 93a36fd822ca9b006b2435ed45d82d9bf5b97b4f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 7364 zcmV;#96RHQP)*x3{;vgfO3F4Dest*5DN%yO&$PQFVQ$J9taA~ zs(M~R8bh*r0EI+AKtq6lf&h)IOB53TUUV=pGXNtl03j62k2IAiBix!eU|?c!VNL*c zkd0#{I58<777G9vAZcl9zUtHfHy@pDDTX8lhIRGpf zcz%seHx}RZ>P)+XFCh;AO({@NQUDeYkKMM>P)kq9j2<2$Yp#uT&Z0b>xOt|K@5I58 zvZ7e9h5FgouY-Wv#H82X@=oR@}0LzO-` zOO>tStEi#Qx2V3eg}kBBySmhTcOW({C}2jtu9>NpeY%UNnE(J9x=BPqRCt{2or`-C zS+ciBgQCEXBC~9FcUQaW6+VA%xGd+s9)d3u1%q}DHSo^1x%<@*@u$4FUH!Ipg%#q99nogJ6|crg+VW@#}*#u;qMaQCQ-e@3a7J*C2$rS zE_)RhY#H>u(!S2;V7P=TC=2js9uQ|2@@ggQ9h1O%h5S0BaX1x0&_X8MJkmy2h8))R z@Xp0zrBa?N)(|Bx!6ENR_so(P0qryUPsELu(1GY zQ!75~Y)VWZ^;$%DQ&=sycvnN(sG#^=vfVjkZ40f{F&ArqRa69y6+o5CWSeRUX{=7z zkP0Q)Nq%6KE;Jtwg9n`UW8e`m{K}jR~5x zC5y}d3bZ=iL^-%lal54Zugqmh*t&X8ln}^F+|NTZ2>OppQ@tzeBQ0X zf&$JDJ>UZUC*RU>hyyHOF3)MYu8IeNb4iI}uXV)QW$D zH;=NMlmw-;vQqUeYOF-jG1@Alut4JNwA*Eu-|r7?Zmd?yS`z-JFx55~$a0xp%Fz`& zd?>-St>*bhS`s^BqZ{NBd)NwMOuP{`6b^e)d_LdUaxtlBnkFe*WvT9@MK`@uiEXS9 zqN-FFt0G8}(s+4H$Rwi8N2Ytkb%>bChQdUOGByT2H?RrFilUaa+7l<2sq3vOr3%Ak z7o>poNcERirKQVg$smbr0dm^A zYeixSJxyfR5USe4=2m%4LxCn1YMWSLX02)TF8@fy=(Y_lNXcclKP>G(craeOEXyv| z-71ATA1s-D12_YEkN#V=lscQiJ)j?I$)xdBb)z-04tO0yqUdx2^h!x-I4|kc^mdZ2 zpca??AvHpSzgW}1TY!MWeW58UQ%)5?LR@TnW+tk{KQw5OPfeDK#Wi|I zNosV3LcWEmgu|TaDuptLR%JBn%g`YOWJTZ9_r|nBTZ+enC@+e{4jRTzsm!)2udKqV z`Hj%(D&j6JzbH2vhg3FmD@hiXCEbHaYl5;Ae7{s&T5^K~gG+c-5JMz9)Ot(QJ8Qre zDwTpssal!NRwG6Jx`ebvGzPQT0@qp+--2#drmW5#1m`TRsZrpKwjg_O7&aR#Fl{Q@ zTDiyBA2?5J-|8jRQhq>ZPLkPz7>Su$ixA|v zf{|6!?NB39`30u>OR7*VD8T!0pS)}wz|RPNM-`N{q$!82m9rn#WyXJeTbshN!+J6$+48?S?=Gs_!V1Ev`}A`35`LlT52`w^ zUTsZ6DJ^57mn;|ZLg{0jFVWILH5iu#g9NLBM_|by1pS7_K6R5`?tg`}>RJ8Wv(-IJ zUr~~#jWVQqW97|?^0DeQXq6Y}&&gpaNeoJWDQOTVmfnNIU!s2Zqd*?uzKza(^E-OE z#C2=qrh!SEh6#qACv#^=QlJbqv>~j2oO+QK(=QdnkDE@QQX-|5i*y0H#N%b@(os^} zS=(9SJ4FNxlXR^>6qYm?@GpTER3K$wid7*igFZp_R>1@{oFAK7=3kdYf^G4UPN?bN@YzCj2nL7^}VYXXk$3!E&^7dh$gIg0-mY_0%K@uFlI|8X=I^2Ahxd% z_tH(eHsMFo@+sSos-Pad^cA~g1uRM`;slV&m0%=F>W0hZlI?phzTh0Qzo1G>pu@4U zRPjn>8lnX_rq>JY7OE5!g%U5YO6{x->{q&Z^VF%cw)_125ANX=V2?X`$yW7dFyzp{ zD@r;rjiOMZE*Q4h&DDY3(pFrdW#t6``n+44Fi`XP0>p}wa?oUcm#d788&C6^fo-o3iRF3dSUPNAZnQI&zI(I+o4(nbjLzdZ?%=vI) z-i~8n9JcGMa4;AO>9EnVPtVWq1^MOXQfcu}`smrLam>&IQ$JgBlcSZ2s-i(^USxwh zO)3H}9(H{%17e&NqZvE|lo4=9#6dfn0I}kfZ&Eqh2l^}i-% z2k)8eR&%rQ99XOZPP@obp*1bCEGSf1-U}|4qnR!_q16GzK+UUR4#Zp*DQ}(4dV(STs`El!Kw&V8WIN0W~v=GQ5Z$%o#o7I#4_0k0E2`d{OA^gAq8$_|k4(kQd*)_B zhe8RPehu?!cTtlh7@9IqTZFs=m90j#RWDY)JY>yVz?+1CvHRXuYpHc2GSv!2EcxJp zH6^*s6Up?VT-`A=GnnOirtjI@ScpSyvdlb*h#DHNqJ$tIK>#IF*#zD~Y%k8NnZ@cK zEhr0&mL?h{RVf>}WW=4#7`*cQ;NaCz^^5cKxKHH0qH$cwhc=*M5l*T233#jsgQ_); zXE|AjmiC(EG^Jf0zvxq=mg>9(OvG$)?lU z#jArPSD7`-sZ7v=dFPlXQY@=12uC5w3t)Ex+zQqcXsA}vmN8M;jJ2R_Y*~*L*k}lp zBotZxTg`%yHyF=>{65I#KLDT3WYX@)7+ex)FaTEyBUAJze&S8yq>F-SNeJr*-p!)d zha)!ozOBJxU7+&Qa-=9NFBf_G%d#}gEBwTZLrnvBFpd&*M*?1!h7#ZlvIdvSkDD;i z5Y&fQU&5pclYac?vhRBhfn|NAz+KSh7}kFntjv-$(J7;TX$F+xeg^-@!vU?2Wp)S{ z(b%YZMJC=YtgERI=IuP~Lutj52dlV3kq6C?42`9i2#bLlG9%I+1gYN%75|}wgI)%- zu%F0yAvHj2C;>iPV1ThZC{^)O?oTPeer*?$35>p*0Wd?VSuxG(5rueZ)DiGhaeqhi z2J6N(3=n^C5BR){b5Ail8i03f6>(?jQ@c#bKT6|TxxL~JZD1i9XhR{to(r48pHYvE zXHzpMSO4;#KN|)k>Mel!Ez@Kl8oVp)8$%bq0YTr&!#{~*zl*o5BCZoCb4S2(wwhS2 zm*8@_tGc|Mce9?ziaI*)CgI93hGJU~i=A{i4iKw#yRf=zaX8VzZ+iI%!|7~HEHK{y z6AXQihVU60!}n+})yvX(Rnn+)Xo&hJM&A-%kA=xTLD`ucb6^WeU)V2GgE^XJK^DyJ z{U%&ATGO7aM|zWJ4zd&b7nsAQf-5^)yF3R>SHaL3LLbvS73TZZ;MT8dvRILD3#TAA zoY21z9)hnDTWRgG9IIGTqpCLy>e@D2vex6)`A-WH`pQW`#npyzaV~HsWnrbYi?!e^ zrGijKvg)qo2}0(YyG z!;}|ThMX9EE76^fyGLB0P|+>yG~b5Jy8|vNw1Q@ZheBHpg+uwqX4n_5Xs=Yl;1>TV@EOPTH`)&Ffq4vDhm5uivBkCjuxl1lLdd)r_d~c2m-{bQ-_Fdy_fpMYt9#;2u9# z<+VEH@ZcY+whLjci*O_vtJCo$)6Jj|=&i_cuJ*;q#=} zW9Pr5*Iy0XT2J=mb8ynH4KH1VH9NzD#PxdvkMLYy90$&0r^j!9*;m7B&Ujxx*S#Ls zKH@O5i$58zLB}1sYt|%~>QkaWarn+uoAk?O4Lt9N zcl1+DdoiF-MoIb1rzdl%Anl%VS7;hhM+QZCG z{Qd77kOCfe8Yj+A-TCsEq2%%7wC@)>@c7dmPq`oA>UF>4WaDJVhfkb1fBts0R_ zH-3j#qn1TRiWQ9;NTh#Z-CJOQ|(ox;KiG}!U;WC&C7cP}5AVW*x2 z^<|$N_|=YcoGsvG_g#VbL%#Hpfq?;}`P(BFRvx{?8X+s! z1Ms_-zy0>i7iyTnJ^)|`u7|=>t1BEw+&>bo_=JVo+sq(d8Vi2o@18wFKSDJ^A1xLj z6}vr%d+@0wuQH((n{h$GhBp|08FaJGs4rD^WBJvKH99PJ;wM;+Z3iwxw%}xVEv_j3 zi<*Awx44@YH3Zm_U#z}HJY(LFA8T|j0LJwrfnChcw^CrJR*##?fv1YIO177JBfI^P ztp}xIF&YuA3iV@-U;dA%K&6MTLEUFt_q*n?QNtD-Z*z|GMfY3iN1eUadFuYH9XQe^fi@i zVXeAxHgMCU$ zD0O4<24B!*31S}u-id+7eXk8#(Vf*-z!>dpX<@N?58_NrPY#^RFa+!T^Z=KMH_x9K zxS=d7((iwb!hR((IWRpvaDi5jS?sm}Lnfw7`i;lY4BGW{nGcI40L_1KU|`aD$dmQE zk5q97zV3ToHHVbFecgQ?Qvh#GobOdDaqO%*B(wUPECDX4{Wnd|M1F8D<-;N#@}MkCFXdj-02{G*m1dFfl)+pDLX~TeKN8Ci%y|}oThUDAI&MijVZ_Xb9 zTlo6X9!8W zo8wy9Vj?-s4PoL4&(0@)IK~EQ-gIg)?E%G4raK?iXn~U5p1pXHFsE-ADb591ls)lm zCYAPrTfny`vz_*1e6XFpI59Cf30dz&BXhU_kg~_Cn=Z4HY^QN);xS8<(>HER8u`Lx zyy|e{qO99>u+y}_o4(kPKALE=8Cgv02A%2x^`_1>(59=jSy}Yz3y04Ug8`UZ_Hq&P*B# zo|sGwe=}T7%I6^*51gC6XiLS%!aFaeJ!o92S4PsABhxs#oSwAJ|D(TugtNM{w)u3% zW?RJh(n!GT-~T;Q`^LrUwL6$gEdo|18;>JyDq%~&b}AcZLKTN@(XNlGl{y#V!e1l( z{r`vus|5&8VCOpm4j$Lxq+9ADyfgwY!+;BaA#RZ7py|F*eoIxS=biK$q>P87L>Mk< zm)A}B-Hr0WKE!rcLIVmafiSLs=M3Pw-D$u*>7nO(gz-*9zudmZMQ_V%9WsU#SYh=6 z=ZMFXMkUjYVlMru37wxUMKPYp%Id4(Z?LP45w6#--`RKI^@cr&g_l>cqT>o7XWSRZ z7p$8&NDjE(uvcN_RjRL^L|iZ7;&`V6<%eAf)pJC@NL2kH;8~mhj zZ+!x|9$#?r?M8UY8r+ z?SRzbUT(r>p0rQ(+j&N=G zoQC1+_%PnoeA<(R^yeC0&d6de8;>Wh4EuIIW*cxvY~S8O+{;(`br~JKmcV4~3Tgk^ z^=sG9Tpu0wbu^_wTqGR1GD>o}-y~AVLiDB6xp~s+$jHda zVHBI~+u?6UZh^|LG1|-5z*fFFc^w)cHSg2H+LQ9!p1CoW$`J2`oB zba*kBiwA=~zpGP~K{|C6K6&s>&eaMsOE~#DsAy2y>=_X#IGSkcmOr31>*2tM#iR&4+eoGxbT-fV9=-)bRE*48Tp3Y@6 zZ?D4tXEMX&XsdD(`qs6}*ZXf}I4g&xACm3BdJOiP==3b*fG46aZ7?J;+CTbDzwICR z#9*#h`nd`h-B;Is<+A6(5-i?A8LX9fE-^Zq7|zfwtL&l8{O2m1yLnhx>v8e1o+172 z$w=dm)MOL3EO;(A-t?*eT7?tuqU%VIV7T;CA7kvEakE7DLK3%e=OR4_nVu=ye%3># zHPZY?FU{g(=8QcIZwjS57Z3V&znz5LF3uJ-xLoX(PhHsE%IFNd`kjlNL`9)HI@$A+ z?e!w_r +Helper for dealing with MS-COCO annotations. ## Overview The MS COCO annotation format along with the pycocotools library is quite popular among the computer vision community. Yet I for one found it difficult to play around with the annotations. Deleting a specific category, combining multiple mini datasets to generate a larger dataset, viewing distribution of classes in the annotation file are things I would like to do without writing a separate script for each. The COCO Assistant is designed (or being designed) to assist with this problem. **Please note that currently, the Assistant can only help out with object detection datasets**. Any contributions and/or suggestions are welcome. @@ -10,7 +10,7 @@ The MS COCO annotation format along with the pycocotools library is quite popula ### Requirements Your data directory should look as follows: -```shell script +```markdown Example: . ├── images @@ -32,7 +32,7 @@ Example: `pip install coco-assistant` ### 2. Installation: From Source -```shell script +```markdown # Clone the repository git clone https://github.com/ashnair1/COCO-Assistant.git # Build and install the library @@ -43,7 +43,7 @@ make Usage is similar to how you would use `pycocotools` -```shell script +```markdown from coco_assistant import COCO_Assistant # Specify image and annotation directories @@ -59,7 +59,7 @@ cas = COCO_Assistant(img_dir, ann_dir) The `merge` function allows you to merge multiple datasets. -```shell script +```markdown In[1]: cas = COCO_Assistant(img_dir, ann_dir) loading annotations into memory... Done (t=0.09s) @@ -83,7 +83,7 @@ The merged dataset (images and annotation) can be found in `./results/combinatio Removes a specific category from an annotation file. -```shell script +```markdown In[1]: cas = COCO_Assistant(img_dir, ann_dir) loading annotations into memory... Done (t=0.09s) @@ -125,7 +125,7 @@ The modified annotation can be found in `./results/removal` Couldn't `pycocotools` visualise annotations (via [showAnns](https://github.com/cocodataset/cocoapi/blob/636becdc73d54283b3aac6d4ec363cffbb6f9b20/PythonAPI/pycocotools/coco.py#L233)) as well? Sure it could, but I required a way to freely view all the annotations of a particular dataset so here we are. -```shell script +```markdown In[1]: cas.visualise() Choose directory: ['tiny', 'tiny2'] @@ -142,6 +142,3 @@ The `cas.get_segmasks()` function allows you to create segmentation masks from y | **SpaceNet** | SpaceNet | SpaceNet_mask | | **iSAID** | iSAID | iSAID_mask | -### Todo -1. Converter for converting COCO annotations to YOLO format. -2. Write tests for untested functions :) diff --git a/coco_assistant/VERSION b/coco_assistant/VERSION index 0c62199..0d91a54 100644 --- a/coco_assistant/VERSION +++ b/coco_assistant/VERSION @@ -1 +1 @@ -0.2.1 +0.3.0 diff --git a/coco_assistant/coco_assistant.py b/coco_assistant/coco_assistant.py index b3fae78..b588d16 100755 --- a/coco_assistant/coco_assistant.py +++ b/coco_assistant/coco_assistant.py @@ -8,11 +8,9 @@ from tqdm import tqdm -from . import coco_converters as converter from . import coco_stats as stats from . import coco_visualiser as cocovis -from coco_assistant.utils import anchors -from coco_assistant.utils import det2seg +from coco_assistant.utils import anchors, det2seg logging.basicConfig(level=logging.ERROR) logging.getLogger().setLevel(logging.WARNING) @@ -292,25 +290,6 @@ def get_segmasks(self): output_dir = os.path.join(self.res_dir, 'segmasks', name) det2seg.det2seg(ann, output_dir) - def converter(self, to="TFRecord"): - """ - Function for converting annotations to other formats - - :param to: Format to which annotations are to be converted - """ - print("Choose directory:") - print(self.imgfolders) - - dir_choice = input() - - if dir_choice.lower() not in [item.lower() for item in self.imgfolders]: - raise AssertionError("Choice not in images folder") - ind = self.imgfolders.index(dir_choice.lower()) - ann = self.annfiles[ind] - img_dir = os.path.join(self.img_dir, dir_choice) - - converter.convert(ann, img_dir, _format=to) - def visualise(self): """ Function for visualising annotations. diff --git a/coco_assistant/coco_converters.py b/coco_assistant/coco_converters.py deleted file mode 100755 index 342d722..0000000 --- a/coco_assistant/coco_converters.py +++ /dev/null @@ -1,138 +0,0 @@ -import logging -import os -from random import shuffle - -from PIL import Image - -from pycocotools.coco import COCO - -import tensorflow as tf - -from .utils import dataset_util - -logging.basicConfig(level=logging.WARNING) - -# flags = tf.app.flags -# flags.DEFINE_string('data_dir', '', 'Root directory to raw Microsoft COCO dataset.') -# flags.DEFINE_string('set', 'train', 'Convert training set or validation set') -# flags.DEFINE_string('output_filepath', '', 'Path to output TFRecord') -# flags.DEFINE_bool('shuffle_imgs',True,'whether to shuffle images of coco') -# FLAGS = flags.FLAGS - - -def load_coco_detection_dataset(imgs_dir, annotations, shuffle_img=True): - """Load data from dataset by pycocotools. This tools can be download from "http://mscoco.org/dataset/#download" - Args: - imgs_dir: directories of coco images - annotations_filepath: file path of coco annotations file - shuffle_img: wheter to shuffle images order - Return: - coco_data: list of dictionary format information of each image - """ - coco = annotations - img_ids = coco.getImgIds() # totally 82783 images - cat_ids = coco.getCatIds() #totally 90 catagories, however, the number of categories is not continuous, \ - # [0,12,26,29,30,45,66,68,69,71,83] are missing, this is the problem of coco dataset. - - if shuffle_img: - shuffle(img_ids) - - coco_data = [] - - nb_imgs = len(img_ids) - for index, img_id in enumerate(img_ids): - if index % 100 == 0: - print("Readling images: %d / %d " % (index, nb_imgs)) - img_info = {} - bboxes = [] - labels = [] - - img_detail = coco.loadImgs(img_id)[0] - try: - pic_height = img_detail['height'] - pic_width = img_detail['width'] - except KeyError: - logging.warning("Image dimension is missing from the image field." - " Proceeding to read it manually") - im = Image.open(os.path.join(imgs_dir, img_detail['file_name'])) - pic_height = im.size[1] - pic_width = im.size[0] - - ann_ids = coco.getAnnIds(imgIds=img_id, catIds=cat_ids) - anns = coco.loadAnns(ann_ids) - for ann in anns: - bboxes_data = ann['bbox'] - # the format of coco bounding boxes are [Xmin, Ymin, width, height] - bboxes_data = [bboxes_data[0] / float(pic_width), bboxes_data[1] / float(pic_height), - bboxes_data[2] / float(pic_width), bboxes_data[3] / float(pic_height)] - bboxes.append(bboxes_data) - labels.append(ann['category_id']) - - img_path = os.path.join(imgs_dir, img_detail['file_name']) - img_bytes = tf.gfile.GFile(img_path, 'rb').read() - - img_info['pixel_data'] = img_bytes - img_info['height'] = pic_height - img_info['width'] = pic_width - img_info['bboxes'] = bboxes - img_info['labels'] = labels - - coco_data.append(img_info) - - return coco_data - - -def dict_to_coco_example(img_data): - """Convert python dictionary formath data of one image to tf.Example proto. - Args: - img_data: infomation of one image, inclue bounding box, labels of bounding box,\ - height, width, encoded pixel data. - Returns: - example: The converted tf.Example - """ - bboxes = img_data['bboxes'] - xmin, xmax, ymin, ymax = [], [], [], [] - for bbox in bboxes: - xmin.append(bbox[0]) - xmax.append(bbox[0] + bbox[2]) - ymin.append(bbox[1]) - ymax.append(bbox[1] + bbox[3]) - - example = tf.train.Example(features=tf.train.Features(feature={ - 'image/height': dataset_util.int64_feature(img_data['height']), - 'image/width': dataset_util.int64_feature(img_data['width']), - 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), - 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), - 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), - 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), - 'image/object/class/label': dataset_util.int64_list_feature(img_data['labels']), - 'image/encoded': dataset_util.bytes_feature(img_data['pixel_data']), - 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf-8')), - })) - return example - - -def convert(ann, img_dir, _format): - - dst = os.path.join(os.path.dirname(os.path.dirname(img_dir)), - 'annotations', - os.path.basename(img_dir) + ".tfrecord") - - if _format == "TFRecord": - # load total coco data - coco_data = load_coco_detection_dataset(img_dir, ann, shuffle_img=True) - total_imgs = len(coco_data) - # write coco data to tf record - with tf.python_io.TFRecordWriter(dst) as tfrecord_writer: - for index, img_data in enumerate(coco_data): - if index % 100 == 0: - print("Converting images: %d / %d" % (index, total_imgs)) - example = dict_to_coco_example(img_data) - tfrecord_writer.write(example.SerializeToString()) - - -if __name__ == "__main__": - _format = "TFRecord" - ann = COCO("/home/ashwin/COCO-Assistant/data/annotations/coco.json") - img_dir = "/home/ashwin/COCO-Assistant/data/images/coco" - convert(ann, img_dir, _format) diff --git a/requirements.txt b/requirements.txt index 3c21069..ee0164e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,6 @@ Pillow>=6.2.2 git+https://github.com/ashnair1/cocoapi.git#egg=pycocotools&subdirectory=PythonAPI seaborn scikit-image -tensorflow==1.12.0 tqdm # packaging