From 52e0c15edd78d2d6f800eb5dd58058d38302bffc Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Sat, 16 May 2026 06:03:36 -0700 Subject: [PATCH 1/2] geotiff: golden corpus phase 2.7, overview / COG variants (#1930) Add the three overview/COG fixtures from the Phase 2 plan: * overview_internal_uint16: uint16 with internal overviews at [2, 4] * overview_external_ovr_uint16: uint16 with a sidecar `.tif.ovr` * cog_internal_overview_uint16: Cloud-Optimised GeoTIFF written via GDAL's COG driver Two new generator flags: * `external_overview: true` is now honoured. Phase 1 only validated the schema field. The sidecar is written by re-opening the file in `r+` mode with `TIFF_USE_OVR=YES` so GDAL emits a `.ovr` instead of appending an internal overview IFD. * `cog: true` routes through `rasterio.shutil.copy(driver="COG")` so the output is tiled and IFD-ordered per the COG spec. Smoke tests check overview decimation, sidecar presence, COG layout tags, and run each fixture through the Phase 1 oracle at level 0. Overview-level comparison is still out of scope for the Phase 1 oracle. All three fixtures stay under the 12 KB budget when written with deflate compression. --- .../fixtures/cog_internal_overview_uint16.tif | Bin 0 -> 8876 bytes .../fixtures/overview_external_ovr_uint16.tif | Bin 0 -> 7443 bytes .../overview_external_ovr_uint16.tif.ovr | Bin 0 -> 2540 bytes .../fixtures/overview_internal_uint16.tif | Bin 0 -> 9976 bytes .../geotiff/tests/golden_corpus/generate.py | 95 ++++++- .../geotiff/tests/golden_corpus/manifest.yaml | 61 +++- .../test_golden_corpus_overview_cog_1930.py | 260 ++++++++++++++++++ 7 files changed, 414 insertions(+), 2 deletions(-) create mode 100644 xrspatial/geotiff/tests/golden_corpus/fixtures/cog_internal_overview_uint16.tif create mode 100644 xrspatial/geotiff/tests/golden_corpus/fixtures/overview_external_ovr_uint16.tif create mode 100644 xrspatial/geotiff/tests/golden_corpus/fixtures/overview_external_ovr_uint16.tif.ovr create mode 100644 xrspatial/geotiff/tests/golden_corpus/fixtures/overview_internal_uint16.tif create mode 100644 xrspatial/geotiff/tests/test_golden_corpus_overview_cog_1930.py diff --git a/xrspatial/geotiff/tests/golden_corpus/fixtures/cog_internal_overview_uint16.tif b/xrspatial/geotiff/tests/golden_corpus/fixtures/cog_internal_overview_uint16.tif new file mode 100644 index 0000000000000000000000000000000000000000..a1073eab83e7694cb451736ba74623e627fe24b8 GIT binary patch literal 8876 zcmeHLXIK+kx1NYd?;yQM??pgFj2Iw5kWP>$20@yNh7dr+&>?gbr1u`G6cxJ>1OaP2 zMgm3zREkOq98gfa6V#)}dw<;L!{_^DGB5Map2?oY+H1YDW+4#D01*HHGZUC4$^mKb zXpD5U2dNbt2{VBqVJHWLD;%n+si~!-Ddic4^$udSgt^!_BB2OV69<$L+|ArVHFa1`7GfkfC?L#=ICr2t+4fG_|M z@ITlK0`e2z^o?_a;|$+8Gsw4mQ?<*li zKmO(gI{)1(5e3fqeI@9ez7ljBZZOhU;w#SvUH$V)ya&Dce_Kmmx9`@{`G3pWwe`69 zYHbH-{?FEeW&p12V61QefCLu+B!GrZ^8kPa9{{xT1Ay380EiO=fKefEOavSg1prZT z00@-;fR~a0V7?6ikPr{>i-I^p4uM~ffA;io`Z4{UCySm@ZMC4tAmw(SJUtg9(nF%{SC7EceU3g+@AdY(yT;9x@oSP1D{nds z*G|!bqZtJbm~X@L9`P&3^Y}O)!t))MLJ7JcN!|QGs;xa+!^Hx71VVSun}WgkAAKTc znPkv%(R9!Q99+IfZZ$@B0Py&8gkh#E;_K$M?;n~e>w7Ged-+|<0Q**enVT2|unLXr z-6RWRt%7Yi4C8zagYd&x&cPUwFpj4%1|1mN1sD?+#wBVv7?G!lD13k@KqZRrCh}b;ik>D4e%ia)lS|rMDt42yUs)_mUYt^1f^c5E zejc9DIZnWk(SM6)=dIrPwMp=d6y zIcA$Vc8xjK6LXxBb1bMi4#PRNNX_KWl2UQZp5{{V>=?!9OvU&oim`2qcp4!pnGn}c zh^Z$eyd^{z5aNdju~!NB73MosfP=)GM%rveVt+u|q()+GB5esEaZZyU;v|-05~Dkb z;{}NUN@BY~V#1NQ;H_U6QZ;Y0vo*Hd=2SrQ)}lpx(OW0b5`t)+T(qzgT3`Szu7~F9 zK#PW=1((s1v3a5++ayM{N=B4QM)ePl$Q>Nj9Uf5|9)<21k=Zkx|V z-D_Nm6>-~@2^9&u3d#@#75)X~E(Migu4Qo73UAkPN7u^Wp)#wXih!YV_n}H0tLO2( zc&y1?tWho2bOH;@#li=$#vNF*<;A@jizbg2jm|Hc&Mv}A7vV1#jc+ZQRZ_k{j8ipx zHapzcyvwWFXRrETyXrlR>Lb?1UW>;2%8mU;8Xt-@_PI7b*wuLNRO6$)_ck$Ga3dD4a6 z#KzvF>xzjLib-vR#Ck%~Rlmdu?tQJiM_YN^ zTKU2Qxy=H3eFJ%%0{KGcxozfoPt5b6=K1X2f7vUIkj*ku_OHrf$v>x*pCg=~t)HLE z=yFcZB}c#|Th}F*WAL2XV2=1;Hgqr-Oa_di8AeqBqwI@O7sMzzVbt_6Dxnw+E}EhZ zO;v-Ye1fJfNmD}6)C_4Vk=wy!P^?Pyv{0;0#VD6&Dpx;Iu544TrPY@u*H`t|SJchx|f4&o<=sYB3nKn8>x{kn#iUBWb0`%Oq`4;Cd1vyRxilL zQ1ZSTWHTJuhMo**IwVWBrVAu%1+0B7*3}oge*$|@5Nnrq*wCgbi=_my45De1Q3)16S(9u}fA-SMy$Ok5aYTKRd zDvfPDy9!$ng>C+YtuBS_VQwvOw>EFLR!6t?;O8w?&)Wi?x4J)X$FX@H_rQl7z8m6M z8{#t&aws>%dmzNCBgA)k$s=Rw@S`Qq^GiOnONUCAyk9PP-CFVmlOftTedleJiR-(l2eS(_&kJG3WE9%n+RH_>F zNfUJ_fck8jIxJ2l6;lV@spJ>b7f|Y7H>l5X)Pc_jz+{-~<$H8}u3vQabf*HYzZTcy zi+ebMyDf<8%f)p&;T{a&y7X}OI&gPGagUaviC3Z}SI$bVCY7$_m#)U6R4*hapr0b|Jp+;kw*;$l9_#qi93eJSd2yXtdWTD;j_jPjQn;+!U9M;V{r~0l98x5mSil+ zz^>IP#Am71CB~-O+9@J>thGymNi48aSnxn#mpIq%`A$*E)APnr+&p{jb-YrwZ_7GZ zYImsy-ESvK|58_kzYijAGZPEyRPrqfx-I8)AgD_%l)6BavDv-QsdD1_!fgfA>4h$h zNOqy^(k6)gmVt8TkQV;l=NzrW1dvFCwa^a$i93Q%4&dht5-TFDo6gWsus1 zrxr&8E|9Im#j#HjzV2AERVZ}v>An-VMY2t_5j@6!FkOEpF3VbfI<@_w{%lN&`jz*| z4b4|(5(Z_&A+$ zYfT?L*&A6E;}4<3|-|OFMrE??{s1 z@Q0BU$u*%LN$QcQRFi)wM}OSKyMt(et2ah zQ>FRJXpX^5*vMIhlCaTiz2T1|SsFJ#j^-M!DV3EOv8q%MJd;(*tL!aQDjPBV4P_O- z)NQBU9xW?36**c_=TmUByn4Uu(aNTfp?77K*1O(SG#tPBuDtf(sdtsxId_i5X{%ozOE8c)ITo+C=ffD@aEs94#6zY+-l>7|d54qzee%51!?rpf zPV(OG?41_8gK{_}AcW$bf#clyI?>B|=bO|Z^~7LCOur@f9-lnI2){(3@v!P_H*x}2f6goct85kJD& zo&vx6{Bc*my6k5|ehtM-yCw-)53f$v7tWlTtjir;Z_Gb4n8nSJ6iRXP zWWAT4=l;^T^?824@C(C$eLjQ^$M0^2Y=VHYF?ZHYNmRw#tNQ&lx4LD|M44z=z(~{H zTqaf((_0xOZJ}5hqkM!=87=SXR~f5*YSJreo2igjjFL~DSG3%IXRlbb5X!448S9;| zVpNV_eif~7@Z_sljRWf@(`I z@4d#Bevx;_TKcv=f8TOXVpR;?%X7B~-7h?G0Np1rKtyMNfa~g~hywVLus~oHiUNr_iyf#wt z%WHJ_O!!E-nMC+loo`9_XtmS9@K;Tt!>c2eHhWgb8cy6?9j!%0tiC!QxyE#{83&=` zS4UT*r4(P_7gg&v-PcO$eDG;ol#~&(OpM}sKyN&nKX^sK5`=u>?0STJV-PiYJ_#=4 zdA`x`9A}?+$7jyIu~wHTK6v*f%0$#ZbzscC=F8&=ZZnrBqRmQ9j>kI*&SzdqS$${Fup{L$= zSzZrdP@8{`EU#-asLdOC;C+?ll|V=ph+ouVb}ZP!X2}b28QRqyB9Ef zoeeZFJpbLj;0OB|8)#r8|GRsU;6J#b21fOcUtF}1A}qTj~Bi_4UFH=z_=tI z5253`C^OJsN*Et+k`y?;wBLV+@&nWAK>~XA^PgIIgSGPAt=SMd{#QWnzw+MoYZ-U! QPp4?(qDZ2}4%cD>#u+h6%$_(I7G_t05ppY^^APPoPN{5(KIV2wWK= zCftCKI+6<%_;jhP@F7@b2qYxMh#^2*l)xwOOZ)5h@$G-#`^d?8-E+^m=W*^o@8|L3 zZfaU;YHCJmYU&zl>Z%m1YU=;UJO0jws&S3K^Pej;|IUA|F;d_8*VkPQX9%*?(kx>K-)>)w(HFqZIB@(^U1psaj`=hMKLaN2>PXl)R65Q#H-M?knuIwKyezKG*%LS5v!j>1wrA%auRh7tD-` zJsJ~!*4F3HztnQ>>eW&yX0e7l8_OrSm!|*sM}K;KIWU#k)28;dn))8CrmxlYcYYJB zuD6HtZ*_xt^17}1F)bMRCtK}fI=9IZ+g-=BBgrpq^^WNZ$bZ;sywEy89=6qeq4S6= zv)%nd`wIDut-%Z3-^l-MQghMLA^*^%<)T9)&o}LK(LO;IH0^QG9VGwOw4+tajXc<- z)2hQJFE(kmYG;zCn)F+B7s(q<>KR%V^c=aOfe45Ydn_sLpjI>lsB@=i1DF!EUP9y8r%OhK44U9Q!ji&yRq8DrYit;qYDi-mIx|X6bVdzaMPcfb_@urTX=uW`6 zl!qyn6L2ndDP{MB(KSka3Sz?a8g(YcV8ZwVr8mW90`Y;W6sh?b>Qks9Lmv};>W?BV zA6NzDzR1i6UO^R$cKR4aQ0O8PAJYixxM+`$F`x2SwATm0r~WS50U4qwWg-}4f}#$I zbRbvVsjo$PImWXTp2#W(F-!d@(pWbPq*RKG*G&Sc zqaxjPSRbWRWVsISqb`efuN&o48bye8(|qcj$Y9-gi_$N$Sx0P9)%O`{exGc%1CwI5 zr^ILYCGIQTbSc+VXS|~tYUXC3?0#>SQx{^5i@|* zVZa`sL97J>{s8k1tI04*L)T*A4AV5sBvzkc{42T(Yt2CXicw(IqYO>aMOdRK6I0C3 zSnViS75V|zJPKZg`4zh>%IF-r3Tql=dJgjoRxir<7xYuCRTSbE%oRDYz+3q#1k_#rt<{$5ZyLr@dW-5^L|Y8iP0rAFb02OdI>W3PiSF}vO=__(JT|rHu=hl2 zNkB!3SiIB6E~3y_?BL@XQA`(`_&7!sofRMQ@r)=L7w_@0=NBFnd-%BXiyw>k`Z)88 zz7l`pu@mH)RSbyXkYiR+g4i4K%qp1{>qGYPLVqy|a+eqPh^--K zc~PD?5b~0jY>L%$Y^@7-i|uk;tcy#;MmY}FMd9N8IUd#}&&Ap~$c92^v1^W7LorKi zp5xR|lrHwoK{b@T74OQiODsf)9dle0i|>d{a~u*9bMui281VvTj%z(NDD{kltFai!RJ-662(wD{n#q65jbg;QV}4PR_{h3ve#xBJVBLPJ zFi7mR?!HyrFSc2C-YO~(2d#T;m1ulntN+9IK08b1_wB8)|H7f-hcO?`=kw4u)dm)_ z&#n*_=3^8B3q7>|Cs_C=rQ!4Roep-#Ltu0V2iN0ax9KJhj>khI>4zLVkB1BBdmQWs zLJrV99NY)O9?|zYI1hwgp?~7wH4y$AeMgI}YlsdV+2Z0FMx(=899%Mh#TFl#SIK&(=A$@z|c&(UkeHhU!-fc*j)&*pgXm=UI?qB!&@9LgvQgoTRbm> zPto;T?B9j>(NQh#@4~w2)-BHOLUZYXEne@!H|Xj(Tk{Z2x*g8NJgk^*gmW+t4WsYJ zd6=hw_bT5g!BCL;YBXL%Q=F@{DUW)J?$85Ec-#Zi*-rfg`Kpt}_ zMUFWX|8yJLzG%~OBlU#KgPT{}T3cwDnXL~haxE&Kv9vSLbvK-t$1@`9ZaT4CGfvde zn9K_q@pTnUmU%{4T`8U!ml0i8j%WF1MAVfnFo_w7b(IUOT^UE?Z@`(M8QA!naF%1n z@pxJ_Gc6-7zM`6Cnh_db8qGYHaW1|*nsqqiRD9VhW_HH;_{vu-y$sCM4R7YDjI&cW zy;+_aC#Ps!=B12;sR}O3DkFTV^cwSgM$A8Y|0Oj1VDROJVjhBVmkhCUM` zJ>z#%pJgvS=0~ewelCsmtEgZZOGErhBbaBUQGVqStb@`|{mS^vucV*(Rq|Q7QgrtX z6!WAsvil~A363^ms0ABqUMAi}Msob(z=5^`$+{#&& zffTcGBanGodUoSxAj?a7a)Z{#yev)FsOV$aNW(Wu^O;G~n2qv$R*>}cM%fm#K$^5s zsrjE^ixBDsExiXRhwg^`=a@z4{R?}_AAGreC;x@vk2m)Xnu$N%H()Y8yl-gl?;h4= zqcBdPhtbL5+ng2;^OO8Y4#UIruaVMs+xpc0>1J8mGD4%$ds2hjY4r?1`bJ*<`ZTw75T|2yO zWRcU|ZrwI&!6|Mxx;R|NVYZuJ9 zQ%isYBMY1^f;BJ-=M)i)5{9ce4+!Q7{Af-U!8Bpy73V3zDq+-{Q%*3R9_Df$5-g|r z*EsbA#PrAqPA|b`dQ_i7l^OaES8(pj%>4Ng9JXU_HYfoHm(7 z4?l|o$lyIAa!!xTx@XjyQzA3U8*bpRWafGNM9v+VY2L^zhbObj8x7=C%8WOM`#7C4 z%T0bhr%{I39NFUZ%WQO$$sRX$JvM|7{t)X^mVC>D7W&u>J~Y48_^)s5D||(agZwVx zZTF5YQMJFE_=$C+vrwCeY^ZS-9VZ@WxXTi{65Sf=SfUF=zlMf%p*hj1p*CFv*C|ME}`_>q0A{^K9*P z(Rt#L+4_&d0HW6{@KHn}2F*4a2sIS8fi(sqjACEl-AbXo!X>b-QuMjPC$QnP&{*LR zSbJJ@R&gk>epGl+;SmUoioQ~O64>Y^)Kwt+YP>`z6$kq6b_(4UZhdu~qKgW@zJ|*} zONCQk?PXDd!n?12S?I4o^#RMGJVjt%qm6L4!Y;qYMij2tpMSSe=&W$fuWJ;gD}3`C zl7t9_V}5OtC`NHOzkW`5MB$ka%!#fm0`eP!ga!)xt(qXwX~n^3%e_{&yc(q_P^2oh3t2+)uExAIW9;VEwvePu{%MFY<}Wm zKS;aM{9UW98|_5%qgEuFmf76hYL`iiZ|1hzFVb?G`!Z}TXkpFYW+3Zm_~xz*yLei3 z^V1CbDcZH>Ua74gEu#6c6xmJ7YVMKR<AqwJD2)2*8UYOJGFPh)|+-J_3;FfOS_cXGhug)c0QFiVgG?fO6~Wt)u&^G{SnqkT31ol0Z0lk<;M@S^i_#v&v- zgyv%8nHti5W7FniaH&IeNaTRS9rOvXr8+GBUCW(S+`nprofxIKR8>i@8o*ttMKc*7 zE>l(6tKQ*qYmed?wK%-0z*kM;uGL}|7+tt5Rh_R=;PT^w;fx|&imKdK{fzrE4qeT7 zfV-fo`c=Q;h;c`w8CAG6Rq?O-1(zL%dBu2&yQDHe)f$d88SKp{$9=A%K-CEDt4TDM z@ep@WC4#CYT;Al-Ym9nax(WtWGq~%Mm=BCz++~#ys+70_-(YUBM_unIu(3h^isUl`eD# zqfL^jl0?;eNp9ECECwLKt3XjTExFc(ku!QESt?glZA$Wq!PbltNs5XWRnH|~644C| zmgIs;8C7p3MB>pz#vMtT3L90gCD}yGEQ2Sxq%udW;NY%0=PjNJ#(I`n*L8NLR{^uaM-PEko^dhxs?opSHL%Jm4j`+f&XQH?bfOT{=mNB#-@Su>_Rv2 z1SnwNbZZ*~e`8bFt#05To5p6d!9{j48_Wc!*cI%yMR0>n&TO>+pRr3b*>&IoyC@Tk z2Vb$vGux)X4{YjUs~^Z`mo2ipK{>l*5zGZ=*_DfJ8{k&v*A}gs;P;s~EZD`MD6`N4 z3>_N7T1Aza5A%^u5AHSWRl}s;o#4irSa@)@Yl?u zcrY6LC9^!f?G?C|Nu6r-21hc>rr2C?DYIk>yavu>R!+5j0F{eh`?czWKQ7+zV^@IU z#X>(Y0vunw>DR^we_y0@x1zwIMOrtz1AM<&+zn=d(~A||ZE|pPk(}FV4L)Bi&1E-$ zZx@Sl!9?)&VtH=cEckJey3ra4jxLsMu=~K}#gYv$ADmmP+-TbZ)l6F{Q5+rYJ-1Jb z+O$l;=BOt$?6+?6f|t671+m7b4v&AP>-kg=Z+YqIc;fT%)$AShXPgD4*|7R3=kbAT zo%%?Y0L-?ik7kX(%hs&NrVDDb;q~Xz$0xJ(>(9OwbY)xD$Gja^WUHS)gAf#D8=a3r zjQ^aieLnJz;6b+e`RF_2zh>_`kBt#jWt*Nq7c>4#w%+-(uLV!Dty7dnA6nDtjqDV(ty%O&caFbb)9l4w765DT-gB47r`Pm*&n^pk z)~tJDmd7{O)JbP-1SM-mq$r#5=WE)e$VLHc&72h7IR18R7YUmrxU*(TI+ry5dQFdX zc22-svm(XJjelIzP@V}ARIV8-qk_gq*L0PU{esRlOJ#Kb`10CrCAL7&xQ0-kD;S?! zGfi*~1Z?>CkEN~pVVhcQE6lRh3 zWdAdZ^zTa3=YgG0_m1OWfP)iw9Df@yae8n(EfP57^zeAP0NCSne*kv?@NjAyz&`@^ zIean^wI0!YL+0EmSL#I!`<$ihQn`Yd3ua9@E71iTj7 z6!<>CW`U(h%LjrMIu+?VK5f!I@V(Pdg?0Okvyh**lpYv!diGNtw0+p%$&1vJ9#4m_ zxVJwU%)HqCl%H!k`i!SdKm#wFxW@^Rz_1g~m2d(WWO6SM;sHLBXHEzM2JqZCLNqXf z=lK#MfT0C0k&pelCmk=U4EIWg-3p z5!|z~DF2ZN-a*-?{zH83SF+FiNBKNm8M@~MihEKP*)xpdxyw%U40doY%Hn(Y9Xv}} zSkFKfH$fKNGm^#gmqqjp$+>y5#GX+(Z@27d-V1APxD1;&Y|V3)9nTwV;HJys^7sur zge){~Adwp*JC`?-$U7oCl{YlYy)HYSH#*BRkYP4o1aeQy&Tb9|^1NgxHwXK;mt_f? z{63zIEPQhypPM9$*&NB|1<6it4sCG@WJ#N&niIj6IO?SvdQVfl--JyZvrM~x>1O%U zFSkzyO27HB!e_BY9O3hhG0yjSfA@D(L)kJ6Dn#8nDZ34|plVObBOwN=>g19DdV;Dy zxqJY+iJ}k59zpFWU`T!ix{JCqwDcSF9jb9?SqGwYeB&;oLCqaC?(!4R?GA?f(jfGx z*HXvg{U{)O<$3wMQ)xgpM)Rol$EW@FqtXm1PYUn{$ZGt=+s>-TL zSb7CL&8kmW_J+!{=+iPT^e_vUmS2PFv+hhUeSmti8mE`_A*%cve^~`|UtZ%ckAUcM zhW`>DdMv-|zl?&)@hsB)`Vl^c8cGQ{!a%Y8=VCapvucFA3E+{dSc|3T&K1tn4D$B~2h!LrMFS-rQMn zB)KL{-C0c|`6kWAteBDce!ZGa3P_qevZ6<_pPM+cdWv*# z?hS9nljJ@(#aq2Z@}HZ%zG6jko}0YBdY*J-Zsy}k0Lg1^`r|5z6f`$yu%e-~4Vo}m z#VGd$y{TNWSGojERjz)n^a+|hy<)6%2%0>-dRBQTXl8Wfpwc5~dUW+GPh8+{x_W~?n<}*sm|4lO27Wu%PW>jr~b*ys|iZ){+Z>nryTF$viZ5(L_9RB-hy8)Co-4N1bd0|s{y%=^7Eu5I literal 0 HcmV?d00001 diff --git a/xrspatial/geotiff/tests/golden_corpus/fixtures/overview_external_ovr_uint16.tif.ovr b/xrspatial/geotiff/tests/golden_corpus/fixtures/overview_external_ovr_uint16.tif.ovr new file mode 100644 index 0000000000000000000000000000000000000000..5a8ea38ff79a068c2b4a7524d78e4c8615bbc0fc GIT binary patch literal 2540 zcmchYeN<9u9>-D3*S17_O;Hfdam-Cq$4J5PvTQ2b(QP^l zdMrRtk#UvG0`;ySiK)k39A}Q8y@0QvyE`(q3ub#p(sHqN&T;n4{ix-p{|Xha9!_4cdl>O_-RGmV6jbDJwo% zTvm)t#FbHKh%?1B3MM!U$WF#*9>yKX2u7SQrqNH^=AweJM}TD8n`4RlHkM)&_x+^* zza_D;;~$gQ;Qv<=r)=^5n8a^wyZyN&zIp%4{^vKGStSsH942Unw*!4PX z-|O_C*8r#Zx}W#);u~u zvk@C;Kn5D48w}wMMs%Sew9ptgYlxgRVz34j))=EQM5v4j6^5`1WBfbA*}nuqQ+%Om z(9{4s4-}&YfvX9o)*wkWC{`8%$O@)pA&FTi<~RaB9!wiYl085(2kXa1`f;Lp>~J0j zO=5?VIB|M*q@II8vr%YHjDj7Z;3TlvVJuF(m2DFcQvAG*5mMpXcZ(VPCLF(+$`_IN zVpd)gkk?Gf6A|;o%*iJFWHW72MBWXWwOF_Y3D-soHQ_=nnyd*WYvX1#kuzEhMuWm= zWBN1^ecFU8ny@R{__rFHAbQXjb~t({z`?teapwf?P6_o+G3ibzE9V4|Q$oopCgzkf zpPj%zE1^9rCVPWs8P;El^p{1~OT+7B=zM8tzASD^8aXAyBuG&SvKXZ_LMclqmxh(g z;#Z|M0b|h5r+_g8cM4o!G#GFV^V9|-sbPUtXaEZ5DTPL2;R18kfS;YG%^Jyppm_-! zWI+ZkMK@Z)8<)^Ume8W5xH(JY+!7|qf=XJ7>9<7mFC|>Hgk4>VU$@u~L z0);U+?l_JIl>;SlU@U?=K=7atphN?ec2n#j4he)jca!HUTb}=b42=3Ag#xGI(qpuZGIwka)E$ z5*r}ZP)Hmisg|i{ODdNm;SjT4CyWw`ddim2jcyex(9CSPY{! z3RkygSMZc9ZYzt|W98Zeq!ON24yjbQ-M@=bFU8e&QtM@;`Yu+!6v*$S z~Jx9IEAl*IB%B2Ij=P0=hV(xk7b2|R{Iofju z*#|T$RQ__2zd}|om((j1`SO;01vn+|m{Rm5$i)eYE~UI(sZf^7Tgw$at8$xwHP7=Y zWGx7tf|ePL7F^>iYU2{AahX+Q0g7Hxik65)%gi|oe(n`*ZiyTOnysp!6;Y5?*0>^R zv?_{LT8gaT+)BrswJ&K!oMi3lUuo~RDzC1zUbXhDuh;~3c8>O*zI2HDjRPSFm+((g z9Xr`K5of*ZpFLeBD6c zPI@IwV6@Mb55NO0l|GM*4s+@x*AOYej(A@u)?d8&Y{eM&D~B)K zlRrn`LTXb!q<&SKyoYqMHuaa*058uE#?xXVyT{WL;AP`LJm&TI*wsD0Q75Y@Imaot znIGv+-lAR5Q5x3^=ms@x?2+YbfAeL_h|k=bWi)C1raK}tgabf{vD#tx?7gyKXzt~A zhmgt5hxd;Lx1a7DMYa~48$~@_Tgy$kmQr^t2OvH@mYa$PpX6ka|7m@A)A9B})BRfy zdQA_$Xt-qR_`T}Qa?myiB1E0plyYOI!=Al5x?%9UeyHOP{>jepKk!fW`kmra{)qk? zf9z_+-s9aPr}9sO-(E;~**$uu{3Y19e&hgsv1asv{Cob3Gjh|Nsei~{VAlx@0(TCImzqG|YpHhA;>?v?zfFI)pJO2mvI78B92U z7*xms8fdjuW=yb^K_DSC0>%I;N~8lf-PQkk`@MDFs{7&AJ+*i3XP>k7dFrh5Vg0I> ztE;Jql!%B3L_|bXTtrl;P70^!zqHstT0wYU{2%?-35kF7UuPhqyZ%0#B;2U{>pt`! zeNq^s`cDi=Q$%FnzhdnFr9J%6@ALJgrD)lIJ(vA^6%omayI!i*aP6=6ouqjA zo$__@)U`hHlSpF4UJ^cC%}-%h_3ep{xat2I#P*x34&jvUW|5yoME6M5{46TB>&{8h zy?bcCiptO9Hg!c$ODW=h)0I9gQ-oX4-F;fx1NT~Y?`c^k?u)K?ztkbzu&!*s%tIVk zSF&IF8t$F0e821`+@Ccf`=w-XPiv(1%Mft$HM{mppTRL}_UxA(#C@s}Ym_p@4c5ps z$~56tY9tz^V{lV7a*eVpxbHQhQBoSX7d6sRGUd3%n%z;-0k}6cd!uBha9?Z0m!xcQ zqcyTiGF>=cjpUMa5^lCeeo6K_ZaY*&O-cgyBveXGCJV<7-K8e&jAMlEQImavTMrd0 zlQO~$gvyl3P;kqk5@pgD++?U+nd~BtA1dl61;srNmG+Y<#r+Yw+fUjD_j~AGKiN09 zjZpCksbjd2P}vEY4&293$qDHjxS3G-3E8hW0ZYVMN)GpoC1ov>kK?d*SxdX)##wu; zWk+$JSz@4+C2oi%1Io1GKCmP}>3G~UOAeIf;kHrNiY z5pC0z6OqzOS5rK#hu3H=VH#yn z3Vu0Eq6~sbCxt=FR519-Fu5|N#q`cF?K0IxJU>j-PXU^q83yrFhT@-xN&6|5rr!%w z_k)(={|MXd2k}WS2~+V?@xlKdw%1STO?rEnmY?by{6?7ggu=1(+hIx*%E#~{VX_m7 z9qISOG$)`P_>W z@h{oZiHcR}RJM8|vqZ#Wxik(hJy1 zo5~LOF}CccVsH8bw&o_Z7r)Au+=Qg2SF=?&RZ{VDZ23*4?espj&Zg=%Ui5&1#FJ1p zF+`Z!o@~egMdv3`YT_5e-tD}Uhrrb!P7uUCX@f$zhB~?sq5T}OjuI2aBv4IkQt#A~6 z8>RGG`6yxpCHq>j9ey9B`5M}e_=u8x4M~7kqEuh2Bp_x`@~@RP;5{gv*Qy%`0Yl`7 z!d^I@p>RZbFX9

WJcPcs)by2=q3B!`O8M;sz%&l#i&mA;uYdjwp@5A2IeFQ5`{i zW{5pjuz=?=6dx;FAchz+j}`C3n;9C9q4yCV7!r>mad0vN`dB3nG0l*Btn?Ay&Cq_V z`Vq0k5WTLT1vF5Z$k^yEarg9gb3yae?etMAk+r3bI(xX7+9OQXAwEd)`sqxo}44r7Vg>OoITe1qZx-e=GJDTS&umTtc^!A zf8?C7wi?a;%n<{1Ei+^}FmS(RRvt$YG_cG(%Q*y^TV@Y&WI$MJh8f2cG;Pgl=4gOM zt(liNwxDHe_6Lpxs2881$uR;=;mzj4GjbsaL~Ir^LX9kL2IN}C1_nddl%H_aWg$2hW^u-*(S zj@hPZZ`K2j=B80^=4FoEre$yTDo1itFEvAlW4vjSnpMqF-84+i4B;H#v`EdKc>5Dzt&o45=T zWg1|BlPS@(!4zioSS`DJCyJtV)tw}Wdg%S?&RIkV+CUxYOgxA- zS9f_qlt#lyPDVr%v?$f6s=aNoCPWYMq_ikM?ZEPUK#5CT^PtquOL6wezm**H!T%$b7Zy8ZdqRx~P zD3s`^fbx6_RXxhNJQqXpkMb$MjiK5^xtHfHQm|3M-BG6ka-bAs6e{2rlxi5| z8bBzeL`3-qu0eNpI2~j}-1#hT(qYzU$$0%-5o>RAu zQ7xj*P7yjNaZ!O&`5jcPD3__+8KU-sP<9krt-d0Qldhp3cgasmrmN|$WahW z=WTDvQT3Nj+Y<6A7nl5O^Yf`nOHQ`A?i9}@FWcMh)Wb_Z*yfE=eq6d>TQEwMU4nPz zSW?a|d34>fq?#?A=_0gJE-eLg<+oBbmz=wD<0*klK3%utsdh{5U3ol8@=|bD0go!V zbSf!Fo8q#BO1h;@HC}Q}B2-Z#m;96RtEj3=$fVq0itmzl((PdC@g=vUyjjZ4rJ$sO zS*rXJ;(Lw*<=m3z_gfBBtEIEw3B8odOM&0>d#O50F5h!gDIrU~-*2Z<9hc61&)cS? zEron9ka%@c!wDZPwfA1wkqYNmr!|oE(R*&+ySj5XwO`@ctpkHx7~1#Q zTsv<}k(OZ&IXhfLYcN+oJL*9zHdi@2!lXSm*E&0Ph<3|dX=wN%t;Jk(X!IJb!d!J| z3>(r`JA(xQH8G=NsxqH<|uiq_Ghb!p6&mfxcEVYrLd+M@Yk zG>KN#qWWRvJFT}x=fju;4Hv7RIh;kSjaAbeb*2@@Dr=6sptZ&B(;PFR<;E(Khbc55 zR)ai>p^;*tMXRJ1P3`)z4g#-?l((c8o2afvCN@7(4N8ZrdW3>Xuj?r$%Doqb} z(C)`-PLJN8RmQ4Lk9?)|#Oh3s$_sT~h{Lc7|SC5?qul^L^Lv4^TEs4Nq#X;nFu9gVfEs)}T(V~wgv zk!*kLv8u}VEE}w475P0Ii*=}~R%PwR>IIjnvXR(>!4-E|hFFu}^1JK^tW9v0FG~e$ z7)CN2OKI2So-|^4&?>xi+pQ` zs&gzQzJUYj9NUwB#G!JGb(nANKptcN$UoswZN-x1!+OiC*k}2NdMh5V%=o6gj&IpZUS%iq9eS&ESdx6b)G{5m3;$qhMK#NqZ<1PG z&5q>Tq*jHnRQZOfq!6|*|7dFE9P2pWB9%PHzR9;wt#)L|^YypO9NFjihqo*GSXO+q z?eaeMWxm~ZRT@i&Z@f)PV~6mMZ&&WH9Qjt;grpxLzoQKQmw_JIemk8@Ut*c0Ig0-E76%o*|&eM7x1TR?o zX?-T)I;^K(_YmO+*rR^fLqaU9yI=1b;R1}_um6eg6RdB)t_%TQN81l05W?%g{d#8z z9(9lR>kksH)&0__Yf3m%_plMxM2M;DYSfD%1k`mj>aP%z>UyJeH3-giZBejt0;aAr zN-u!mQ`a7)KSj7v*R!N+OK`7yv;^xS#MgB%=_L_@>*!1R-wCO8eQLTAgj3-(HCPrQ zEF4tRb0(m|AFJuVAY2XqrA*g|;2Qq03`QYDhj*3fVF>==9cB891Z;S(pDvVu3~%#; zl@cPtJN@*02;SlCe)?|+3E@2xy2l7^;g2R@9fY{>?g_mcgrIQxg#K4TN_d~Ot{ed| zPqT*Q6E4n!LT)2?&Oa6+8sW$JUxYM9I6MDP2wsFs^Iby5A_UHN2yu##Jl`uMCxXj- zn-G!+k@KBG9wPY8w+qpSaC5##NI8Ua^N)l;L%2NOEo2u$$UI$$DTFlG$z23ph&kM; zQA&vogF6xSLoC9bT5`5@zMaf9D0T7}Ft`gp12mL62P`+-T}S^!I%!m#iw+l-^pXMe zH4>as44`9#mAzycokTi?DJG#Y!UA71iM~NXEEad7eaUn5 zReyMC@jZ03uQoKct`Pl^d%t?lzc=dPoBC_T#1epf;kY8PiO zgbDGYOwOARBCbcLZlExO6C@B@)6s`eG8ZQF54wL=%4o# zpE~51)qh^y=vhR@tzy)nfEWFK>fao>co~zHoH~L@Yevx-ll!+G4w^ie2R2P{XBriO z-%NAQG!+30rkQ5|58$=w?K90x;EO4Ku<;NuY??RN^bp{hW)A|_fOn<^gUz3SKbw9w zZIl6?HszQ$5rFxo3{&6?z-+o@+B^t+YD#ZvGzA8m2u)2*z)DkA6A%MTHRU%ouK?eh za50SB8bn5J@Iu_-eK2mszR-HvIV0=_okR~l`B(WbnWrY?Zjl)VBZ0kcg7E6v}5 z?UHt2*WKRJ%fSH(r zsphYMVC84qMmgZwN{($)KEPSYum#+K@s(S)&7;8QmGrJgOJHb)(ACrmd|1iq0^)(` zmHe(|9e;M)rRd!qv|wvzX~sTWvX$^H(c z0&^<`-{|do6pE>8Ip&bo_#>MLRPC+ z_~rO|f>`BwV`grGVx^by_&|b8r3aM>BxqFnP{)@OBq~vnOi}{0(mQf|GC{7=^F6aO zLA%oT{Ww2CH0Zo4Gcy4acIP-Rb(u~*f@sR}C84o)1euCzV4}JV&g5(V9CbKd@b;kSV z_)LQQjORCIPlC>j@3(QmhKT)nc_w~C!QM-L{Mm+-y+;AFenZXPr(m43vCAHHj!E25 zw)Z|aKEAQX-gAukXk(wf@7Va~4Y8i{R?NH&#U3xK@u3Zw9*+mi<_(P=p9kX~HY9pb zmzm@ZXpi^h@#zh@9?w-~_l9ND5GC%<2tQfp^;Y+=jfsbBEctp(F6!85gxZFS#75Rfk9@lY0xX^>A5^(yG6h z@NTC@M*~gM%hJGr5NC~u#|K|%ME;T+IZxhYRO^aXBpVn3u9zaSve7-)2oLfRqx-Ir zO!6M1`T_JIvbj<70OlcipAmH+;u`sc(Sw1=Ph>GljR{(Y45KufU$PYNOE_WFra?jVUKXDfgly0?5ZG z_oE}H$a0kWWwb5XlG414=^|@WsLK&aWCzNF<;d@3QA~|GT7s;HX;jB#ks+7{^$2J3 zK}?H!$%q?dd(4B$$ggDa#Tpy599e&{(FT)GR$6SZ ziEt+$UTm?693{&x)^(yS$!3d9otRd#<|3stBA#rw*xDJ%BTFvUV$s@U<3#|AsUoW` z-or)&laDXn$41VQAbi)`L83L^p{& zLms4bM8^h@M=5mmSZDG8ro%thhdhFz+r+w)hZZ}qvBBiAMf&d8Q-S@^4rDAUa2QHA zjCBnhEbWMh^$#2^rK`ju0|$IMykorsM||i&5ftk?J#vF?AA4?k=xawxY{>N3SGqX&q+P#U z2ZDRvZdi`4&pmB7nBQ@c>t{EbPgmkP*$udNcyhh$M%?L#xj)zqjduLVy7vhb`%5+%w&StsR%R0o|jmbWN^v_dt9{AlIjRB%W@^b?+YHbtH3xyT^ER zN$#oSe(eqyE-HCgn{Lc?O&+Z3h~)Ywk5z88jgQOO+a7l8o%6(L&4~KATesV&wY2%; z3fR?J)x4?!X0;Yx;+BJyR?;P209e{ua_Qp~*wI>fY1J0YZzX=!Or+9@+uU}j4ur2mV)=J-#w<^%!_No;c0z0Pn|>r+GKP z%J{p}AHRY<@zv9-av+{}$Bvs1*7M5jcgsJNlYMirL&0eLXJ4Zf^GGnpTSO z+~m#mpp@ftGvC(iQ>^BuzpbaFIL^(;FwI(Zo^-WlkN(~?0;)^r5MJCkYaA%f#OGdpXJ0;`?ro%J+3!8ny4&UGNKYdZ%zxMz1C;!s_H+|86_d@@to+tdD zcb*i!6Y($Y^iRLkSVu$z`Hwy;Y=Zu~NBVPvn8-Qdxlv)iQ(Id^p4*+P=6UvEEnNB5j@_6@FSbbI&!Y^Lw77#0*g~_fQfMD7imSqADoq z64Y*Q)ZX7w;zv=k?I_6vl$ZT$7x5!b`bV6Vjvx;oaglxOWcJuu^D)xyv5Vw&C*$kR zs@IXnue->9akBd2tn&p~7?!RM^$WvmN$h)ewc=OTil@;PPy8#MVK4lOyzn&Q!V~Wc z&l2AK>h|tw+`A`1``TC84%#d|ZT7(`mPr-cCYWUy%sx8HvY2JtJFxT}*oS*rX1#2? zRF-in`}j8N@`BEPI$@%U`d5o|U5jARMSA{4`dANLqz5d*L(khoKY^+1#)QQ&^@0@J zR|_4~@93!)9wgl{Ar;#A-!b$rJUV&DVzSWQ=8nEi;o;6ZW}St0*gMA9!sGlqLIV_B zUBwH9(Auq0f3>{EwY)C6yw<CDV+P+m3J1ga%iis;geO`biV77NKCt;T5Haj0T%MQ|RUtM?5S<};5KUlqHQoU{yvSt{vespfl zVs72uaZTTG{czu!S>L){+M03N`thALp@B54OaU&fUm*ffx>~5cp$0JFcXnXi}j+iHP;TgY++o@GtcCCnikZ zahoiPn=A}cZ(kkg-Pd2cZ=kKBAE+4UywHE|!a)1G{`>C+dQSA$pBQ-bOMmk(1KmIM zQ-2zu|Jg4zFsUn3JY!N{A*SMawdA~OiAQt^%D==D>wO;S?GfRP^7i&j`2D=w?;df# zqk>f0SAkxQ###-at-KK^2RZ{9?*#zuQ;qkhfF9e%dRyR8S7UP*(4EvsO#M5Lf;2>fI}@cdtd?z3PAWN38D^ zr0=x|->crfKPJ4r;`a7h+}o=`s_m-$qEigJv$)7IUqfL zK=w0DYKSKDfhIjolij)^_410$`zz9KugHE|lNwu-SzVK!>)#ps@7K;Z$)}IY75*wE zCSpj4vdH;Id6%Tpb^jirJ0HGZ+GHP%6wg*T^|P+40TFH*-9S8LsCTt6ypCm$gC7)t}Y| zROkL&8*<#qRM0(dP>6(hA7MOK@HB6G4)OhI`>funx*rY=!J1Y`vtw zi5rfs0%O}pZrE`j55*^Q+pBln zt*m9h9;pzYkVDKM$W None: f"got {entry['external_overview']!r}" ) + if "cog" in entry and not isinstance(entry["cog"], bool): + raise ManifestError( + f"{fid}: cog must be a bool, got {entry['cog']!r}" + ) + if entry.get("cog") and entry.get("external_overview"): + raise ManifestError( + f"{fid}: cog=true is incompatible with external_overview=true; " + f"the COG spec requires internal overviews" + ) + def validate(manifest: dict[str, Any]) -> list[dict[str, Any]]: """Validate the parsed manifest and return resolved fixture entries. @@ -403,16 +413,81 @@ def _rasterio_kwargs(entry: dict[str, Any]) -> dict[str, Any]: return kwargs +def _write_cog_fixture( + entry: dict[str, Any], out_path: pathlib.Path, pixels: np.ndarray +) -> None: + """Materialise a COG fixture by staging a plain GTiff then copying + through GDAL's ``COG`` driver. + + The COG driver enforces tiling and IFD ordering per + https://www.cogeo.org/spec/. Going through ``rasterio.shutil.copy`` + is the supported way to invoke it from rasterio. + """ + import tempfile + + import rasterio + from rasterio.shutil import copy as rio_copy + + base_kwargs = _rasterio_kwargs(entry) + # The COG driver owns these settings; passing them through the source + # GTiff is fine, but we strip the codec/predictor from the staging + # write so the source is cheap and let the COG copy apply compression. + staging_kwargs = dict(base_kwargs) + for k in ("compress", "predictor", "zlevel", "zstd_level", "jpeg_quality"): + staging_kwargs.pop(k, None) + staging_kwargs["tiled"] = True + staging_kwargs.setdefault("blockxsize", entry.get("tile_size", 16)) + staging_kwargs.setdefault("blockysize", entry.get("tile_size", 16)) + + with tempfile.TemporaryDirectory() as td: + staging = pathlib.Path(td) / f"{entry['id']}.staging.tif" + with rasterio.open(str(staging), "w", **staging_kwargs) as dst: + for b in range(entry["bands"]): + dst.write(pixels[b], b + 1) + gdal_md = entry.get("gdal_metadata") or {} + for domain, items in sorted(gdal_md.items()): + dst.update_tags( + ns=domain, + **{str(k): str(v) for k, v in sorted(items.items())}, + ) + extra_tags = entry.get("extra_tags") or {} + if extra_tags: + dst.update_tags( + **{str(k): str(v) for k, v in sorted(extra_tags.items())} + ) + + cog_kwargs: dict[str, Any] = { + "driver": "COG", + "blocksize": entry.get("tile_size", 16), + "overview_resampling": entry.get("overview_resampling", "nearest"), + } + if entry["compression"] != "none": + cog_kwargs["compress"] = entry["compression"] + level = entry.get("compression_level") + if isinstance(level, int) and level >= 0: + if entry["compression"] == "deflate": + cog_kwargs["level"] = level + rio_copy(str(staging), str(out_path), **cog_kwargs) + + def write_fixture(entry: dict[str, Any], output_dir: pathlib.Path) -> pathlib.Path: - """Materialise one fixture. Returns the written path. + """Materialise one fixture. Returns the written `.tif` path. Real writes only run when called by the non-dry-run code path. + Fixtures with ``external_overview: true`` also emit a sidecar + ``.tif.ovr`` next to the returned path. """ import rasterio output_dir.mkdir(parents=True, exist_ok=True) out_path = output_dir / f"{entry['id']}.tif" pixels = _make_pixels(entry) + + if entry.get("cog"): + _write_cog_fixture(entry, out_path, pixels) + os.utime(out_path, (DETERMINISTIC_EPOCH, DETERMINISTIC_EPOCH)) + return out_path + kwargs = _rasterio_kwargs(entry) extra_tags = entry.get("extra_tags") or {} @@ -432,6 +507,24 @@ def write_fixture(entry: dict[str, Any], output_dir: pathlib.Path) -> pathlib.Pa ) dst.build_overviews(sorted(overviews), resamp) + # External overviews are built by re-opening the file in `r+` with + # the TIFF_USE_OVR=YES env hint so GDAL writes a `.ovr` sidecar + # instead of appending an internal overview IFD. The sidecar is + # committed alongside the .tif. + overviews = entry.get("overviews") or [] + if overviews and entry.get("external_overview"): + from rasterio.enums import Resampling + resamp = getattr( + Resampling, entry.get("overview_resampling", "nearest") + ) + with rasterio.Env(TIFF_USE_OVR="YES", COMPRESS_OVERVIEW="DEFLATE"): + with rasterio.open(str(out_path), "r+") as dst: + dst.build_overviews(sorted(overviews), resamp) + os.utime( + out_path.with_suffix(out_path.suffix + ".ovr"), + (DETERMINISTIC_EPOCH, DETERMINISTIC_EPOCH), + ) + # Normalise mtime so re-runs are byte-stable on filesystems that # encode timestamps in sidecar files. os.utime(out_path, (DETERMINISTIC_EPOCH, DETERMINISTIC_EPOCH)) diff --git a/xrspatial/geotiff/tests/golden_corpus/manifest.yaml b/xrspatial/geotiff/tests/golden_corpus/manifest.yaml index ad345ff09..2137003bd 100644 --- a/xrspatial/geotiff/tests/golden_corpus/manifest.yaml +++ b/xrspatial/geotiff/tests/golden_corpus/manifest.yaml @@ -51,7 +51,15 @@ # overview_resampling: str Optional. "nearest", "average", "cubic", etc. # Default "nearest". # external_overview: bool Optional. If true, write a sidecar `.ovr` file -# instead of internal overviews. +# instead of internal overviews. The sidecar +# file ships next to the `.tif` and must be +# committed alongside it. +# cog: bool Optional. If true, write with GDAL's COG +# driver (tiled, IFD-ordered, internal +# overviews per https://www.cogeo.org/spec/). +# Implies layout=tiled and overviews built by +# the driver. The `compression` / `predictor` +# entries are forwarded to the COG driver. # gdal_metadata: map Optional. Written into the GDAL_METADATA tag as # {: {: }}. Domain "" is the # default. Used to exercise XML escaping and the @@ -141,3 +149,54 @@ fixtures: atol: 0.0 rtol: 0.0 lossy: false + + # --------------------------------------------------------------------- + # Phase 2 PR 7: overview / COG variants. + # + # These three fixtures cover the overview-storage axis: + # * internal overviews living inside the .tif IFD chain + # * external overviews living in a sidecar .tif.ovr file + # * a Cloud-Optimised GeoTIFF layout (tiled + IFD-ordered overviews) + # --------------------------------------------------------------------- + - id: overview_internal_uint16 + description: >- + Stripped uint16 with internal overviews built at decimation [2, 4]. + Exercises the in-file overview IFD chain. + width: 64 + height: 64 + dtype: uint16 + compression: deflate + overviews: [2, 4] + overview_resampling: nearest + pixel_pattern: ramp + tags: [fast, overviews, internal-overview] + + - id: overview_external_ovr_uint16 + description: >- + Stripped uint16 whose overviews live in a sidecar `.ovr` file. + Exercises the external-overview lookup path. + width: 64 + height: 64 + dtype: uint16 + compression: deflate + overviews: [2, 4] + overview_resampling: nearest + external_overview: true + pixel_pattern: ramp + tags: [fast, overviews, external-overview] + + - id: cog_internal_overview_uint16 + description: >- + Cloud-Optimised GeoTIFF: tiled, internal overviews, IFD ordering + per https://www.cogeo.org/spec/. Written via GDAL's COG driver. + width: 64 + height: 64 + dtype: uint16 + layout: tiled + tile_size: 16 + compression: deflate + overviews: [2, 4] + overview_resampling: nearest + cog: true + pixel_pattern: ramp + tags: [fast, tiled, overviews, cog] diff --git a/xrspatial/geotiff/tests/test_golden_corpus_overview_cog_1930.py b/xrspatial/geotiff/tests/test_golden_corpus_overview_cog_1930.py new file mode 100644 index 000000000..60bf2e864 --- /dev/null +++ b/xrspatial/geotiff/tests/test_golden_corpus_overview_cog_1930.py @@ -0,0 +1,260 @@ +"""Smoke tests for the Phase 2 PR 7 overview / COG fixtures (issue #1930). + +The three fixtures land here: + +* ``overview_internal_uint16`` -- internal-IFD overviews at [2, 4] +* ``overview_external_ovr_uint16`` -- sidecar `.ovr` overviews at [2, 4] +* ``cog_internal_overview_uint16`` -- COG layout (tiled + IFD-ordered) + +Each fixture is rebuilt by the deterministic generator and shipped in +``golden_corpus/fixtures``. These tests assert the shape of what is on +disk and run the Phase 1 oracle against the base (level-0) image. +Overview-level comparison is out of scope for the Phase 1 oracle (see +``_oracle.py``). +""" +from __future__ import annotations + +import pathlib + +import numpy as np +import pytest +import xarray as xr + +pytest.importorskip("yaml") +rasterio = pytest.importorskip("rasterio") + +from rasterio.transform import Affine # noqa: E402 + +from xrspatial.geotiff.tests.golden_corpus import generate # noqa: E402 +from xrspatial.geotiff.tests.golden_corpus._oracle import ( # noqa: E402 + compare_to_oracle, +) + + +FIXTURES_DIR = ( + pathlib.Path(generate.__file__).resolve().parent / "fixtures" +) + +INTERNAL_ID = "overview_internal_uint16" +EXTERNAL_ID = "overview_external_ovr_uint16" +COG_ID = "cog_internal_overview_uint16" + + +def _fixture_path(fixture_id: str) -> pathlib.Path: + p = FIXTURES_DIR / f"{fixture_id}.tif" + if not p.exists(): + pytest.skip( + f"fixture {fixture_id} not generated; run " + f"`python -m xrspatial.geotiff.tests.golden_corpus.generate`" + ) + return p + + +def _manifest_entry(fixture_id: str) -> dict: + manifest = generate.load_manifest() + resolved = generate.validate(manifest) + for entry in resolved: + if entry["id"] == fixture_id: + return entry + raise AssertionError(f"fixture {fixture_id!r} missing from manifest") + + +def _candidate_from_rasterio(path: pathlib.Path) -> xr.DataArray: + """Build a candidate DataArray by reading ``path`` via rasterio. + + Phase 3 will swap this for real xrspatial backends; here we use the + rasterio read so the oracle has something concrete to compare. The + test asserts the level-0 image only. + """ + with rasterio.open(path) as src: + data = src.read(1) + t = src.transform + crs_epsg = src.crs.to_epsg() if src.crs else None + nodata = src.nodata + height, width = data.shape + pw = float(t.a) + ph = float(t.e) + ox = float(t.c) + oy = float(t.f) + x = ox + (np.arange(width) + 0.5) * pw + y = oy + (np.arange(height) + 0.5) * ph + attrs: dict = {"transform": (pw, 0.0, ox, 0.0, ph, oy)} + if crs_epsg is not None: + attrs["crs"] = crs_epsg + if nodata is not None: + attrs["nodata"] = nodata + return xr.DataArray(data, dims=("y", "x"), coords={"y": y, "x": x}, + attrs=attrs) + + +# --------------------------------------------------------------------------- +# Internal overviews +# --------------------------------------------------------------------------- + +def test_internal_overview_fixture_has_overviews(): + """Internal overview fixture exposes [2, 4] on its first band.""" + path = _fixture_path(INTERNAL_ID) + with rasterio.open(path) as src: + assert src.overviews(1) == [2, 4] + assert src.count == 1 + assert src.dtypes[0] == "uint16" + assert (path.with_suffix(path.suffix + ".ovr")).exists() is False + + +def test_internal_overview_fixture_matches_oracle(): + """Level-0 image agrees with the rasterio reference read.""" + path = _fixture_path(INTERNAL_ID) + cand = _candidate_from_rasterio(path) + compare_to_oracle(path, cand) + + +def test_internal_overview_fixture_size_budget(): + path = _fixture_path(INTERNAL_ID) + assert path.stat().st_size < 12 * 1024, ( + f"{path.name} exceeds the 12 KB per-fixture budget " + f"({path.stat().st_size} bytes)" + ) + + +# --------------------------------------------------------------------------- +# External `.ovr` sidecar +# --------------------------------------------------------------------------- + +def test_external_overview_sidecar_present(): + """External overview fixture ships with a `.tif.ovr` sidecar.""" + path = _fixture_path(EXTERNAL_ID) + ovr = path.with_suffix(path.suffix + ".ovr") + assert ovr.exists(), f"expected sidecar at {ovr}" + + +def test_external_overview_fixture_reports_overviews(): + """rasterio surfaces the sidecar overviews on the main file.""" + path = _fixture_path(EXTERNAL_ID) + with rasterio.open(path) as src: + assert src.overviews(1) == [2, 4] + + +def test_external_overview_fixture_matches_oracle(): + path = _fixture_path(EXTERNAL_ID) + cand = _candidate_from_rasterio(path) + compare_to_oracle(path, cand) + + +def test_external_overview_fixture_size_budget(): + path = _fixture_path(EXTERNAL_ID) + ovr = path.with_suffix(path.suffix + ".ovr") + for p in (path, ovr): + assert p.stat().st_size < 12 * 1024, ( + f"{p.name} exceeds the 12 KB per-fixture budget " + f"({p.stat().st_size} bytes)" + ) + + +# --------------------------------------------------------------------------- +# COG +# --------------------------------------------------------------------------- + +def test_cog_fixture_is_tiled_with_overviews(): + """COG fixture is tiled and carries internal overviews.""" + path = _fixture_path(COG_ID) + with rasterio.open(path) as src: + # Tiled is detectable from block_shapes: a tiled raster has square + # blocks that are not the full image width. We avoid the + # deprecated ``src.is_tiled`` property here. + block = src.block_shapes[0] + assert block[0] == block[1] and block[0] < src.width, ( + f"COG fixture must be tiled with square blocks, got {block}" + ) + assert src.overviews(1) == [2, 4] + # No external sidecar should accompany the COG. + assert (path.with_suffix(path.suffix + ".ovr")).exists() is False + + +def test_cog_fixture_ifd_order_base_first(): + """The full-resolution IFD is the first IFD in the file (COG spec). + + GDAL exposes this through the ``IFD_OFFSET`` tag on each subdataset + (the base IFD has the smallest offset, with overview IFDs at larger + offsets). We probe with the public rasterio API by reading subdatasets + and confirming the base image is reachable as the default open. + """ + path = _fixture_path(COG_ID) + with rasterio.open(path) as src: + # rasterio reports the base image dimensions on open, not an overview. + assert src.width == 64 and src.height == 64 + # The COG driver writes the GDAL layout sentinel into the file. + tags = src.tags(ns="IMAGE_STRUCTURE") + # ``LAYOUT=COG`` is the rasterio-surfaced marker for a COG-layout file. + assert tags.get("LAYOUT") == "COG", ( + f"expected IMAGE_STRUCTURE LAYOUT=COG, got tags={tags!r}" + ) + + +def test_cog_fixture_matches_oracle(): + path = _fixture_path(COG_ID) + cand = _candidate_from_rasterio(path) + compare_to_oracle(path, cand) + + +def test_cog_fixture_size_budget(): + path = _fixture_path(COG_ID) + assert path.stat().st_size < 12 * 1024, ( + f"{path.name} exceeds the 12 KB per-fixture budget " + f"({path.stat().st_size} bytes)" + ) + + +# --------------------------------------------------------------------------- +# Manifest schema coverage +# --------------------------------------------------------------------------- + +def test_manifest_carries_three_overview_fixtures(): + """All three fixture ids are declared in the manifest and validate.""" + manifest = generate.load_manifest() + resolved = generate.validate(manifest) + ids = {e["id"] for e in resolved} + assert {INTERNAL_ID, EXTERNAL_ID, COG_ID}.issubset(ids) + + internal = _manifest_entry(INTERNAL_ID) + external = _manifest_entry(EXTERNAL_ID) + cog = _manifest_entry(COG_ID) + + assert internal["overviews"] == [2, 4] + assert internal.get("external_overview", False) is False + assert internal.get("cog", False) is False + + assert external["overviews"] == [2, 4] + assert external["external_overview"] is True + assert external.get("cog", False) is False + + assert cog["overviews"] == [2, 4] + assert cog["cog"] is True + assert cog["layout"] == "tiled" + + +def test_validator_rejects_non_bool_cog_flag(): + """``cog`` must be a bool; non-bool entries raise ManifestError.""" + manifest = generate.load_manifest() + defaults = manifest.get("defaults") or {} + entry = dict(defaults) + entry.update(manifest["fixtures"][0]) + entry["cog"] = "yes" + bad = {"version": 1, "defaults": {}, "fixtures": [entry]} + with pytest.raises(generate.ManifestError, match="cog must be a bool"): + generate.validate(bad) + + +def test_validator_rejects_cog_with_external_overview(): + """COG layout forbids external overviews (cogeo.org/spec).""" + manifest = generate.load_manifest() + defaults = manifest.get("defaults") or {} + entry = dict(defaults) + entry.update(manifest["fixtures"][0]) + entry["cog"] = True + entry["external_overview"] = True + bad = {"version": 1, "defaults": {}, "fixtures": [entry]} + with pytest.raises( + generate.ManifestError, + match="cog=true is incompatible with external_overview", + ): + generate.validate(bad) From 1eb2a6b383d7ead38554fab41bff2f96778f1220 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Sat, 16 May 2026 06:05:15 -0700 Subject: [PATCH 2/2] geotiff: golden corpus review fixes (#1930) Self-review on #1997: * Rename `test_cog_fixture_ifd_order_base_first` to `test_cog_fixture_carries_cog_layout_marker`. The body never inspected IFD offsets; it relied on GDAL's `LAYOUT=COG` marker, so the new name matches the assertion. * Replace `Path.exists() is False` with `not Path.exists()` on the two sidecar-absence checks. `is False` is brittle if `exists()` ever returns a truthy non-bool. * Note the oracle gap in the test module docstring: the Phase 1 oracle only reads the base IFD, so overview-level parity is out of scope here. --- .../test_golden_corpus_overview_cog_1930.py | 35 ++++++++++++------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/xrspatial/geotiff/tests/test_golden_corpus_overview_cog_1930.py b/xrspatial/geotiff/tests/test_golden_corpus_overview_cog_1930.py index 60bf2e864..6490aedca 100644 --- a/xrspatial/geotiff/tests/test_golden_corpus_overview_cog_1930.py +++ b/xrspatial/geotiff/tests/test_golden_corpus_overview_cog_1930.py @@ -9,8 +9,14 @@ Each fixture is rebuilt by the deterministic generator and shipped in ``golden_corpus/fixtures``. These tests assert the shape of what is on disk and run the Phase 1 oracle against the base (level-0) image. -Overview-level comparison is out of scope for the Phase 1 oracle (see -``_oracle.py``). + +Oracle gap (intentional, tracked separately): the Phase 1 oracle in +``_oracle.compare_to_oracle`` reads only the base IFD via +``rasterio.open(...).read()``. It does not inspect overview IFDs or the +sidecar `.ovr`. A future PR (post Phase 1 PR 2) will add an +overview-aware comparison; until then, the smoke tests below pin the +on-disk shape and the base-image parity check is what runs through the +oracle. """ from __future__ import annotations @@ -98,7 +104,9 @@ def test_internal_overview_fixture_has_overviews(): assert src.overviews(1) == [2, 4] assert src.count == 1 assert src.dtypes[0] == "uint16" - assert (path.with_suffix(path.suffix + ".ovr")).exists() is False + assert not path.with_suffix(path.suffix + ".ovr").exists(), ( + "internal-overview fixture must not ship a sidecar" + ) def test_internal_overview_fixture_matches_oracle(): @@ -166,25 +174,26 @@ def test_cog_fixture_is_tiled_with_overviews(): f"COG fixture must be tiled with square blocks, got {block}" ) assert src.overviews(1) == [2, 4] - # No external sidecar should accompany the COG. - assert (path.with_suffix(path.suffix + ".ovr")).exists() is False + # No external sidecar should accompany the COG. + assert not path.with_suffix(path.suffix + ".ovr").exists(), ( + "COG fixture must not ship an external .ovr sidecar" + ) -def test_cog_fixture_ifd_order_base_first(): - """The full-resolution IFD is the first IFD in the file (COG spec). +def test_cog_fixture_carries_cog_layout_marker(): + """The COG driver writes a ``LAYOUT=COG`` marker into IMAGE_STRUCTURE. - GDAL exposes this through the ``IFD_OFFSET`` tag on each subdataset - (the base IFD has the smallest offset, with overview IFDs at larger - offsets). We probe with the public rasterio API by reading subdatasets - and confirming the base image is reachable as the default open. + The COG spec mandates IFD ordering (base image before overviews) and a + leading ghost-IFD layout block. Rather than re-parse the TIFF header, + we trust GDAL's own marker, which is the public artefact rasterio + exposes. Phase 3 backends do the equivalent check before claiming a + file is COG-shaped. """ path = _fixture_path(COG_ID) with rasterio.open(path) as src: # rasterio reports the base image dimensions on open, not an overview. assert src.width == 64 and src.height == 64 - # The COG driver writes the GDAL layout sentinel into the file. tags = src.tags(ns="IMAGE_STRUCTURE") - # ``LAYOUT=COG`` is the rasterio-surfaced marker for a COG-layout file. assert tags.get("LAYOUT") == "COG", ( f"expected IMAGE_STRUCTURE LAYOUT=COG, got tags={tags!r}" )