From 8fee036dd297dd0928223fb8c76e8e81c6070dbc Mon Sep 17 00:00:00 2001 From: Peter Vegh Date: Mon, 14 Sep 2020 18:16:32 +0100 Subject: [PATCH] PR #1 tests added --- tests/data/test_protein_seq.docx | Bin 0 -> 5378 bytes tests/test_basics.py | 25 +++++++++++++++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 tests/data/test_protein_seq.docx diff --git a/tests/data/test_protein_seq.docx b/tests/data/test_protein_seq.docx new file mode 100644 index 0000000000000000000000000000000000000000..dd6db5068b0de26b7e1af604a380405d7124308a GIT binary patch literal 5378 zcmaJ_by(DE)26$myFp4qQX1(HDQOU4=~$$eSfoQ(P+}>irMo+%I|XSFK|pe)OTNYT zJ4cS+_nz77+Uxhn-urvzo|$Ln9t~Aw6k-Gn3=D)9LEM@McMKc;{M^OT!Ig*m_PZqZ zi7K)TPU!wMiR|8@17~>O$O+?PAmcH5aPrRH*a$kmZOP4&3|6B?L`HA3*Wm&6gyYE1 z4E6VskusfywlXhy#~XR=Jm)2N=*%X08ucXKeo{#`<|teW&U!(p{3g6xZKpth?`8fw z2CJu9z2fGKRe3Ql7i&+G{2pec^G!|L^%*rG)m$oF2@83XRx$^(TAAh=msN>ylc;+E z#?7+53SOSu2|#f?O>C}xGDUDk4zvXnM`cRAUJlNdpFmAdU zIh@rwOLdg4p^A>NIflqI3-?z72?0UlzcNAq{{;YOq2U5_cIA2D?9A=y=#UyS?9@q5 z5WMe;g}qKG8#`UYsP>8On7AP3yHJZfv5*iH7eh$%#|cZ++Qq^=PSph(X{+e5iAOL& zR250`Is{+_o$L`J@hW?s%hJ!NQe?TorL%?;Q{08SjIzOy5s<_Z7)4@x?>crO-*D4% zNZUcX5|o*kVk#xti_tRDc zk6Pm}0w<*m#NtgHy$KEs*!o7}77^5}LH zqMXSQ1_wPJ&>v-egFd;n6*$Qk4Fn)-4vDm2wYtkF^0jRo6`pBx2I(Zdl3Rd6CS&L< zje>UHC2}xe2~j_-@D7Q1<=Lv zmgXoelcycL#J;C`T)xY5xH7i2F&Q7pl-0Uyz(tFeI(4PC*g<~4x{82-AmHhOtM{JX z*V$ICW*QqNj^qqp`ab0TBDFqo8J?mLzesV5&g8*oA8ImGU12hV@7Yyf;A3&C__%27N0A4+o1h=(LmPj}Ue$uZ zPKw7N4!YflIk1X*1A@HR$s0H}dZpH;ilEaXR23$|d+BP|2i_#=n@MA^T_1>F3lOKN ze!{jh%|DW#*BIM!5m)SoV#k-je(*MFTHq1gw-SJoU>;Xeg(~Q%#^2$UGcB2mui`r2 z>1d3TjX);ATSyByraN6uG`y@TJE2SCJXt&Y3XD{@sD|uZK~n3_y%^>v3An($4U+u) z9-9?BNDKcOBx-n&KtLA&&wp+RcvZMNS~|JihU!1JMa&bd4qk$g(=^7RrQBx(F}Y51 z&YZ^xe5th`g)sZsynT=(4B1vY)~|f``FNf`zl=5+LwAhw=TZ*H)U2Jzz30qUH_1{Q zsodJDu4Rl_;t|kIMTAl<=hW?`@!;ek^K|CxxVZ~cEiT{ARGs^?^yxWUftOFwh>=<6 zpCMh94){Zb!`jLnP3%Lcibf#F7v>SxX>RnU*zat6%V!{gO<97Z$jE zMJ=2~it$Zc`|{`DjMhr&XYx%|);*^S4QV4Fg(@@&c58k~T9)^jP;Nbscd+B{u!{tK zEHl@!5%Zo?C9q^uO0De|2YS+UBb9kbB7;r=0kf)rG?QLtR7-~!GK1;0ZuQ6p6h!zs zt8v^smRY#Nzp^;!t-VI$rv2YFU@uhGoYOrWMqG_f_l3(C4dP>W$%XEfS z>D5zsC>&A$EgXL*=G_Y2eQ)HNEkx|Ue94(lcacz&nwv@k`brZTX;zO@xBi38V%7>d z9P5$kX@Ot1Us>CS4stQ|$^s&>ME5?mK;Pi#Xi1t>CX}_w4WPk*36mD9NZZbl)R8Fl zfQ7jFk1flM$ouKOJI*X2=j6E}iixQ#9AfEVQoe>bJtAq=Yz}%Gp+eX&^#Y1n^#M^O$+u}S}WM{ZF+o$O0FQS7dr$hqp@GA=C zekbE1$ircdN;R$-%j(wf{cJk_XZH);MXM75U+4`0CY|~%sa1TpYcD+NV_X=`8Xe}?&ejbi_LLt^Cl4qGy0f}3y%+5HX9A^%F#>p?q0|G3TGnD!i>)F^JTvr@Vj? zuUlUwzPmc58p%7DOCP5CGA63GZ44Z0FnrrYZ$O)0`JE%Pf=ib`htVZUwDWoH5&oVB z?H3P1#ldJiGR)MMZ!NI5*%Lc)YWz=La+fZbN}u#fs^(V3Ur*%H2Wi(P1v_GS>8kiV z%9Ypf<&ztqkzL*tQxWV%H4Q<2#lAJ7tT^_S-_^+U&Fa*J-uCm+e${L}u(O;MZjNx< z1XV;9vr03u4Q09doNXFX?}6M(e_<}tjc<9uiQ}a2mQpiKlH|Up42s&|^b&fOV*Q&E z#ZvNC^*uYKX5D3y-qRzCV$L;%3>*~ccvM#6?kIxYU}%XAwkBFs_$O8_+d)}p1Lz-xmT$p6DL;R5%29a zW4|Hmj7Em>mX{^Gn->e_shqX1uiRHQUo#rvbR213-=aPsEoVZ{7Y(Og8mDueJ+KrQ z-FnaO91YZHWS3ykss+|(acl>y+coppl$LF0a-yZ!ZJN{1l-MtNJfJ^M*_)+b4KT}Z z^qvTgrmI&|oB#;BIS%X12Gm*h;?gDi%v-z6K`tla4ST_n(FOOJ&2jfyA&mECJ!b?V%LDSuUpFE(5OCSHgWgrkIm|~dgc+p zxLpcFP02pPDtna05E8*dQMs#7NOCU9i^3ltkk4LgrIcbOl0XAfVIIirER0lyr1&)T zo%R5tP7sr%6x_f81#@`1V*0yg5{G6`w%l!kRxTlAXg2VZd1RuY37~>wX z_0A!Xj>mf#6RmjX>3DEu;LkF$sYf3Su^ih*2&nad9%bLPjU7~3$tR!mV`xWIIeu|d z{~WTNJLL6kDp&ns?kmVD;H%D53FJHXxCDfKJ2r`6dU2p)dYv7n;0`12^=Gyy{iacn z+U}*oi9NxrsQdV9;b;pq3Wh^i*D627l+Sf@xyp&p(P4S{%=LsB; zC#{gjqH?&1dV{p^pr*V!Y5f~^`k~BIqI4In5n4miyRWGmUb8&5${5xfxz_3vNo0JS zGju|6h#$DpH|p;);ZWr_Gs~78L#)E=%|n>X6C13sS^IG7GN5TtzeKy5tj4_@ zhK4Nucq^ZeEmZLW46N~KFKZZrGmet82x1GyI+_qmn_+pQOT#k?OqP{$Dm9=0{Pq`k zv4qRCK5t^*q0{<@tYrj#n$&-g*@5y0z}Yk=$5^LHoV*MSait>^?2Zp({QYmH;3l;`ik~*5#wmi zIoOn9B2V7RAz#RIsqu;=ZFxt-@H%>GI>=hDl~uf_7te^~!pits>>OBNHDjCi_;SvG zDlYZa(&lkPg8ucr3Gyb)u~Oi1z^;`X-7$?=X$oxW>uKalmfw|7%m&OS0CbR@kLFC| zK2RGw=Y;3IKz%g0yihRCb6XR*SG6Ud;fg>Kz9jkYvoI07CX8g^b1+;PJ=gJawsbYQ z)del_PoDuF!=6Y zFR5X6dkY`+lZxBft9YcXb0IR7OyoY8mGymRTH#*9@x29EP079-V1lui;u=qbgT%kx zKBjDOO^?)aks!v9E{}c1ILgyd&FYV78%|Nov4^=)69`}e*ApSfL!`VnW2|BQvF|{# zfCM#>&-Uy{$6t-ICC4XT6Q^dQiR~UhP#}I!v~-=C-TT-@A{*nJYi+Ah20SVHVn0T| z$I|)EMhG5rrZzIf&+EjmQaFw2(%YV@UT6^Lzp)r+qGUk1h(G9k7i}w0i6S?UjfvME zNkSPoy;^Y7(~&4TG#<$!$$bM78@eAfW_2#v(HhoPAH@giD8rKc$x|qN%eJ$}_|^mX zsB4OH%QhksF~aYS)!og_?Z)b#c4vR}C-Ckv;&!p~+X&&W`g6_mC;F~RzMZjunEA{J_wnC^^`HNASC#$qAY5=^@P9PvpYXdX;#Ru-HX-$}na7k^iI{0YBXN4Fj1w{heBb^5>i$)Di6X>nU>zwI&nDF094X{e&Y SokBpsgnz{0VJiRS@&5pIx+rP@ literal 0 HcmV?d00001 diff --git a/tests/test_basics.py b/tests/test_basics.py index 0cefb7e..bd31215 100644 --- a/tests/test_basics.py +++ b/tests/test_basics.py @@ -6,6 +6,7 @@ example_path = os.path.join("tests", "data", "test_sample.docx") +protein_path = os.path.join("tests", "data", "test_protein_seq.docx") def test_crazydocparser(tmpdir): @@ -33,3 +34,27 @@ def test_crazydocparser(tmpdir): ax.set_title(record.id) filepath = os.path.join(str(tmpdir), "%s.png" % record.id) ax.figure.savefig(filepath, bbox_inches="tight") + + # PROTEIN SEQUENCES: + biopython_records = parser.parse_doc_file(protein_path, is_protein=True) + record_features = [ + [(f.location.start, f.location.end, f.qualifiers) for f in record.features] + for record in biopython_records + ] + + # Test for names with "/": + records_to_genbank(biopython_records, path=str(tmpdir), is_protein=True) + assert os.path.exists(os.path.join(tmpdir, "Protein sequence 2-a.gp")) + # Test truncation of LOCUS names to 20 characters in Genbank file: + with open(os.path.join(tmpdir, "Protein sequence with a long name 1.gp")) as f: + first_line = f.readline() + first_line.split()[1] == "Protein_sequence_wit" + + sketcher = CrazydocSketcher() + for record in biopython_records: + sketch = sketcher.translate_record(record) + ax, _ = sketch.plot() + ax.set_title(record.id) + filename = record.id.replace("/", "-") + ".png" + filepath = os.path.join(str(tmpdir), filename) + ax.figure.savefig(filepath, bbox_inches="tight")