From b4e3ff4a5dcf6b363a9a23430f58da96dbe2edcf Mon Sep 17 00:00:00 2001 From: NoNameHeath <110743588+NoNameHeath@users.noreply.github.com> Date: Wed, 6 May 2026 21:21:56 -0400 Subject: [PATCH 1/4] Declare 'value_parts' variable under both 'if' and 'else' to avoid 'else' referencing undeclared variable This can probably be more elegant, but I truly know very little about python so wanted to make as few changes as possible. --- scrapers/EmbeddedMetadata/embedded-metadata.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scrapers/EmbeddedMetadata/embedded-metadata.py b/scrapers/EmbeddedMetadata/embedded-metadata.py index 5d8a211e3..83935c2c2 100755 --- a/scrapers/EmbeddedMetadata/embedded-metadata.py +++ b/scrapers/EmbeddedMetadata/embedded-metadata.py @@ -225,6 +225,7 @@ def process_image(image_path: str): for list_value in value: value_parts.extend(list_value.split(',')) else: + value_parts = [] value_parts.extend(value.split(',')) for value_part in value_parts: @@ -407,4 +408,4 @@ def get_imape_paths(image_id): sys.exit(1) print(json.dumps({})) -sys.exit(1) \ No newline at end of file +sys.exit(1) From df121939867d5aad9d476cbc6eda260ced4f2861 Mon Sep 17 00:00:00 2001 From: spaceyuck Date: Sat, 9 May 2026 00:47:28 +0200 Subject: [PATCH 2/4] [EmbeddedMetadata] make embedded metadata scraper testable, add some basic unit tests this required renaming the python file, to make it importable unit tests run both exiftool and pyexiv2 methods,and as such require both installed and working --- .../EmbeddedMetadata/EmbeddedMetadata.yml | 2 +- ...edded-metadata.py => embedded_metadata.py} | 82 ++++++++++++------ .../EXIF_IPTC_XMP_keywords_title.jpg | Bin 0 -> 9722 bytes .../test-data/EXIF_camera.jpg | Bin 0 -> 18072 bytes scrapers/EmbeddedMetadata/test_config.py | 3 + .../test_embedded_metadata.py | 67 ++++++++++++++ 6 files changed, 125 insertions(+), 29 deletions(-) rename scrapers/EmbeddedMetadata/{embedded-metadata.py => embedded_metadata.py} (89%) create mode 100755 scrapers/EmbeddedMetadata/test-data/EXIF_IPTC_XMP_keywords_title.jpg create mode 100755 scrapers/EmbeddedMetadata/test-data/EXIF_camera.jpg create mode 100644 scrapers/EmbeddedMetadata/test_config.py create mode 100644 scrapers/EmbeddedMetadata/test_embedded_metadata.py diff --git a/scrapers/EmbeddedMetadata/EmbeddedMetadata.yml b/scrapers/EmbeddedMetadata/EmbeddedMetadata.yml index 8fa1d171b..c5c82d364 100755 --- a/scrapers/EmbeddedMetadata/EmbeddedMetadata.yml +++ b/scrapers/EmbeddedMetadata/EmbeddedMetadata.yml @@ -4,5 +4,5 @@ imageByFragment: action: script script: - python3 - - embedded-metadata.py + - embedded_metadata.py - image-by-fragment \ No newline at end of file diff --git a/scrapers/EmbeddedMetadata/embedded-metadata.py b/scrapers/EmbeddedMetadata/embedded_metadata.py similarity index 89% rename from scrapers/EmbeddedMetadata/embedded-metadata.py rename to scrapers/EmbeddedMetadata/embedded_metadata.py index 83935c2c2..703f4ffa5 100755 --- a/scrapers/EmbeddedMetadata/embedded-metadata.py +++ b/scrapers/EmbeddedMetadata/embedded_metadata.py @@ -10,9 +10,14 @@ # try importing config try: - import config + import config except: - config = object() + # is unit test -> load test config + if 'unittest' in sys.modules: + import test_config + config = test_config + else: + config = object() skip_ensure_requirements = config.skip_ensure_requirements if hasattr(config, 'skip_ensure_requirements') else False @@ -20,15 +25,20 @@ ensure_requirements("pyexiv2", "pyexiftool") try: - import pyexiv2 -# might fail due to old GLIBC, fall back to exiftool + import pyexiv2 +# might fail due to old GLIBC except: - try: - import exiftool - except: - log.error("You need to install the pyexiv2 or exiftool module.") - log.error("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install pyexiv2 exiftool") - sys.exit() + pass + +try: + import exiftool +except: + pass + +if "pyexiv2" not in locals() and "exiftool" not in locals(): + log.error("You need to install the pyexiv2 or exiftool module.") + log.error("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install pyexiv2 exiftool") + sys.exit() details_date_fields = config.details_date_fields if hasattr(config, 'details_date_fields') else False details_title_fields = config.details_title_fields if hasattr(config, 'details_title_fields') else False @@ -40,10 +50,10 @@ details_upprocessed_fields_unignored = config.details_upprocessed_fields_unignored if hasattr(config, 'details_upprocessed_fields_unignored') else [] details_ignored_labels = { - 'ExifTag', 'Orientation', 'PhotometricInterpretation', 'ResolutionUnit', 'Contrast', 'CustomRendered', 'DigitalZoomRatio', 'ExposureBiasValue', 'ExposureMode', 'ExposureProgram', 'ExposureTime', 'ExposureCompensation', + 'ExifTag', 'Orientation', 'PhotometricInterpretation', 'ResolutionUnit', 'Contrast', 'CustomRendered', 'DigitalZoomRatio', 'ExposureBiasValue', 'ExposureMode', 'ExposureProgram', 'ExposureTime', 'ExposureCompensation', 'ColorSpace', 'ComponentsConfiguration', 'CompressedBitsPerPixel', 'ExifVersion', 'FlashpixVersion', 'YCbCrPositioning', 'JPEGInterchangeFormat', 'JPEGInterchangeFormatLength', 'BaselineExposureOffset', 'FNumber', 'FileSource', 'Flash', 'FocalLength', 'FocalLengthIn35mmFilm', 'GainControl', 'ISOSpeedRatings', 'LightSource', 'MaxApertureValue', 'MeteringMode', 'Saturation', 'SceneCaptureType', 'SensingMethod', 'BaselineExposure', - 'Sharpness', 'WhiteBalance', 'ShutterSpeedValue', 'ApertureValue', 'FocalLength', 'FocalLengthIn35mmFilm', 'FocalLengthIn35mmFormat', 'ExposureTime', 'ExposureProgram', 'ExposureBiasValue', 'MaxApertureValue', 'ShutterSpeedValue', + 'Sharpness', 'WhiteBalance', 'ShutterSpeedValue', 'ApertureValue', 'FocalLength', 'FocalLengthIn35mmFilm', 'FocalLengthIn35mmFormat', 'ExposureTime', 'ExposureProgram', 'ExposureBiasValue', 'MaxApertureValue', 'ShutterSpeedValue', 'ExposureIndex', 'FocalPlaneResolutionUnit', 'FocalPlaneXResolution', 'FocalPlaneYResolution', 'AFAreaMode', 'AFPoint', 'AFPointsInFocus', 'Focus', 'FlashSetting', 'DigitalZoom', 'ISOSelection', 'ISOSpeed', 'ToneComp', 'Quality', 'RetouchHistory', 'Saturation2', 'ScanIFD', 'SceneAssist', 'SceneMode', 'Sharpening', 'ShotInfo', 'ToneComp', 'PixelXDimension', 'PixelYDimension', 'XResolution', 'YResolution', 'ExifImageWidth', 'ExifImageHeight', 'ImageHeight', 'ImageWidth', 'RelatedImageHeight', 'RelatedImageWidth', 'ImageLength', 'ByteOrder', 'Offset', @@ -54,20 +64,36 @@ details_ignored_labels.update(details_upprocessed_fields_ignored) details_ignored_labels.difference_update(details_upprocessed_fields_unignored) +def _read_data_pyexiv2(image_path: str): + data: dict[str, str] = {} -def process_image(image_path: str): + with pyexiv2.Image(image_path) as img: + data.update(img.read_exif()) + data.update(img.read_iptc()) + data.update(img.read_xmp()) + + return data + +def _read_data_exiftool(image_path: str): data: dict[str, str] = {} - - try: - with pyexiv2.Image(image_path) as img: - data.update(img.read_exif()) - data.update(img.read_iptc()) - data.update(img.read_xmp()) - except: - with exiftool.ExifToolHelper() as et: - metadata = et.get_metadata(image_path) - log.debug(f"exiftool metadata {metadata}") - data.update(metadata[0]) + + with exiftool.ExifToolHelper() as et: + metadata = et.get_metadata(image_path) + log.debug(f"exiftool metadata {metadata}") + data.update(metadata[0]) + + return data + +def process_image(image_path: str): + if hasattr(config, 'FORCE_METHOD') and config.FORCE_METHOD == 'pyexiv2': + data = _read_data_pyexiv2(image_path) + elif hasattr(config, 'FORCE_METHOD') and config.FORCE_METHOD == 'exiftool': + data = _read_data_exiftool(image_path) + else: + try: + data = _read_data_pyexiv2(image_path) + except: + data = _read_data_exiftool(image_path) ret = {} @@ -113,7 +139,7 @@ def process_image(image_path: str): date_details = '' for field in ['EXIF:CreateDate', 'Iptc.Application2.DateCreated', 'IPTC:DateCreated', 'Exif.Image.DateTime', 'Exif.Photo.DateTime', 'EXIF:DateTime', 'Iptc.Application2.DigitizationDate', 'IPTC:DigitizationDate', 'Exif.Photo.DateTimeDigitized', 'EXIF:DateTimeDigitized', - 'Exif.Image.DateTimeOriginal', 'Exif.Photo.DateTimeOriginal', 'EXIF:DateTimeOriginal']: + 'Exif.Image.DateTimeOriginal', 'Exif.Photo.DateTimeOriginal', 'EXIF:DateTimeOriginal', 'EXIF:ModifyDate']: if field not in data: continue value = data[field] @@ -406,6 +432,6 @@ def get_imape_paths(image_id): case _: log.error(f"Not Implemented: Operation: {op}, arguments: {json.dumps(args)}") sys.exit(1) - -print(json.dumps({})) -sys.exit(1) + + print(json.dumps({})) + sys.exit(1) diff --git a/scrapers/EmbeddedMetadata/test-data/EXIF_IPTC_XMP_keywords_title.jpg b/scrapers/EmbeddedMetadata/test-data/EXIF_IPTC_XMP_keywords_title.jpg new file mode 100755 index 0000000000000000000000000000000000000000..09a5e25467b22f226cdaaabd09591eaa1f1d29ea GIT binary patch literal 9722 zcmeHN3s6&M7XEKSke3JoYAst+#m2T5l6!AnH-#_|9!5o7z(?DyG$t2}27*bDh`QNU zx9(`GE861Pj@lO2aa!wWUv{fYTOZhmQw2t}c2A|8pL zH5vZui2g_np9K6MVla{jZG>8g2ZQY+#0WnIqzFooeF$(T30i0;V{uVZhn|I26yeiD z;IWC5i^dmNZJbk@YpdZ(X4aS-oYciRUD9&kl2WsSGr2g6)LJDSKh5s4JEz&JrG^rk zlsZkbfNye2Gpd~yQkpTnx`LFNsw~nBm$f_trobLz%YC0Y7W-5#T!*yCWVTjeorqQ_ zR5}H#Q)r}=&`?nbN&bpZACQkw&HnkwkzK z38n-iNHlgtka#;Zg0M1y=)5Nq1=^9@ed@wJ;zL&ydx=-{9EuY>Ei)7$f0G<4>3bY$|f z*Ei=5OG_#KbSk5CJpZ+!&+z(^3n$vlPSt=nw#VYj2PI4FDU9jVu znp2kwWGfo~{NBqu_n*GJ?)_Z{TCYqjpY_5kfB#_jw?ACf6i%wBUD&Yx!{&o$t|cOo z2&|0|Z0Zv!Q3)nIO!bcd3uX>YlF-jBOU4#_TKx3|CT;2o$Mf}uk`%L3*)}{98_?$w z)n_eW#G3;x`4iMG?qtw}?k$jYn7|)FabjW4M3jxLl-3)EKblr=9A!+aFRnMHt!V6y zF^>P2AH6(z#pxQ)#->R(GYnMS>dDRj*d6=S-dFUKN3Z%I}IM}D_pjJEcy`}B^? z#*vE#>sxuWBzu3;?af23?&Z(pF7#G@-u9j|+4{`s=&r`kvJ z=*vVN*^cw5v6x5ALw+ptPF{MeoJZ|Lx2cO)gh_2bJFDMEo>NP&$azE-w!iD~E(7^g z&j!zpsoCdV<gCHOn*C-1zGFDKn1_Oux47&^;MA+B@c==M3X{X(^8;ymhSM z*;@@>EssFk#hsp`8XlcZF|VID>*nSI%#2Sy*?VQn#_!J_Jp0t)BL^q*Xx{sD)7s`g zIxbkBkze1+qh0IA@@Nx}&Me-adi?g3*6NElwlqI7g>;{(&n#}qf5JfS_Rg9 z7;4>taj@uHKU#B@^;~|f!SKVkKOg-qa!ZOo%7n4OWcjrh0`y-{@9XjOltPF z*B43Wzq81g_R8p?cw^xazdso3d)aOd!<~wV>$syc849G$H5NtSyB|WIrow4LghZ(# z2};EW1F8RHP>N3m_`D!Z7w-6}q6d%|ju7}WHUxfBi}LAegvm@St{1&C`#mdsdbs^D}wd>|~f+ig;c$xx|epu}M* z*A?d#_*FnJOXjz=wzf8-mdUU?D&!Q)vT_A2r|EP+q&sIdK+N-2kr)jFa#+4Kcbl&}%WvIU85WRk@rH2}%y?q|!`_S!e0c zhFQKTa%?W4!Ptj# zyQ9+N(pz!l<ygUKBOSJ1_Udl>)~L@ zbUln~C~U;n@!Kc35@I~|*JX9txSp<7h!@h%_20$xp&<{<=Yxgh-@WG~2jvjV%COqR zBSLZdQWAG{2D{DfD1l1l^i-z2GYhY%z#D6FS!Z#1)(Y6p^o&}oR+35ut5yhmx%d%RGVYL{;vI2&sgf`a33ffo}&&UcoS!@Zdpwy&-1O%xf)ue{hk}L_Kp(v7~ zNs1vUB}u7B08tu}(vmb_X_970T1nC>l2((nh6Ljn#SYx4871!_81 zX~@^+7v!jw8f~u9zlvci7F@^iRySvxoM(mY$qBm}ETzJa?>rJN@??hyO{qggLPlWM z6!wt+DQ&9*UzWSh-QaLY0tb1%EH1KJI6ado?~0AR>EIg;hVc$N{=Tkv78mA9jrj&O z#j4fmbOsbbm$+Z$8+^a-rX=x5h)1}q%eA_k<2gr(sj@nB?hu#z0 zC+b(3DerpTOo1)jWyf$;6|QM|VeRkW;^1~LCs{3k6cVd5GYAYzPFe-ot%CGcLEfua zNOs744dgwPKMkd;feh!tcVAB;P^T-D>6D_BVswl~M}=7sh7BsPTdd`?1LC0$usRhL>Husi0pzcW zuG3Y_b+ZQq;_ED(UCqhyD%Uv79KeTl95f+tMMb~7Je8IHqo#n><#7(UV_OOw_DbRG zT5WPTIXpv_EHH(zpwdjiG@Zq6uEB>^W1haI#%j^AIZUpmK$%ZdN}8hba&py-LYt#u zSPE*)K*2tHha5yVpv_w4beXEmTqkV>cheRkht~?{P*G8+FL(>YX@t35TCAeGpW_a> z4rRtp2Is2S90QfhfHJ0Q_kGGjk!au8zmjTopWvu7!ur@RHep#3B*? zg)0JnB~cPdM1&-!54@R;i;0Va_s;MNxqm|6_{8|w*n|NIiAl*RDJgON1`ZmKJg9$i zN-|bLh(RVo5-pKLC-;r*o804z{|vuqK156t6Yyr%_o8_xN(m5s0=$-fIaH#z{hN(|7%z50=M-2AiyZ|iKuAOb;cXT`61#?o2_l>LWr!#Q((Oc% t*@20=g%H^feNr1O)y}!Tx2*~b|VF`E7umI#2(no+C{x3(`z+eCX literal 0 HcmV?d00001 diff --git a/scrapers/EmbeddedMetadata/test-data/EXIF_camera.jpg b/scrapers/EmbeddedMetadata/test-data/EXIF_camera.jpg new file mode 100755 index 0000000000000000000000000000000000000000..cf503ef31833064f1330e9605b833cd07db31903 GIT binary patch literal 18072 zcmeHuc_39^yZ_$j9AoAwM2;z<3`d4@%o#$Ni;~JbW;&E987dkKAt}jFnusD&QBkHO z3W?D0H768FBf3wY_rCA_{_cDK`Q6sn+Uv8Q^?9E4JkMTx?X}N$N9&}$m31=+ z@pMBFV`CYF5kU|Zgbl?ZL_k?k*#Xl-dk-oHU^ZxLpmL#j1cer>f+*c$+X{Ha5`GC- zdWnMrtO&Zi(Eb6H1egcfFHpI0f73`uc{DK*O!A!qk=3g+&FBlse^JODcb*Rixzh5;43W-EiRH2a6 zC`#miC2=W=QC}7Nf|1y|s zFfzu+%IaVIk^e;=1$MA|@;&gQ0fQN24sZ-$%r^qejo77exd0yq``A3iN)m9e`6LMP zWRP|a_k;n5Aw6W_>%<~%askt^MQlq#0l_Kz?j?% za0y`S8u|mi3k(Y2Fu+V~-`AZ|z#MGf@BbdaVr+1~0H+-=R`kp_nOh--#%2bvC?L99 zHk5zgTEksETq5OQ!;kT#5+d z=!zig+JM%9Aad8gSGqqy#*Z9CrVt%@BvK>&2)I`VDsuuL~2Mf;zF&yn1ZGrZcHcuO-P12szUeju6PY{Gw2|L49?iTJ7JQN;go-;hVc|P+r z@~ZG+`@p|1*i?h35%JF+?70IwP3(Y!dmST0!43<@0n_s5x{CGwv*PdSmx+qQ6-3ch zC^xcz%Ayh|Jt~8;p{(d~bOl^j$$!<4Oe3FwO^4z^lO832H9k}fRxV*UYQieUgfgI9 zC^xihP#IA{lx49p0dF;|R?;v6HypW8c9auFT@7c;P>w}C8Q_Y5goG-9&ho`xRa6(W z_<_NV%EK6<;Ik~OaB?USH9%ERQ<#es)C|TK2ALpuK!#QgxEPBJtKmj;3u=uDfvzAb zhD^e!+~7SciUS5SSXV(cP!kw+EsQ6Nd_)m+8N|p4eH`Gk1o&ixDx#|3vn-6r3Epyn zHZR-CJM#{pw2L%z+xZ&?^m5U!>mYJku{lNU7D zKwlgrqTr<^Smpy3CtO=bKxD8au;|wY)EnIc{W{1uh z0J2>4#~*bExfPr{K$g5fhIwZV^TPs0F#?GjMC1rEZ}8k3&H}*~TNraIR5Q@t3sLR@ zn*m_K9=!Dht|^S*3=BIs+JJmJ;5}e%GhkbY&jBKH2cPX>pP++&fK}rA?(oN+@W&Z~ zm>U`{35i75QUDXHKSkdGNRY+7_98&^^}g%<9dD8So%8+pV^3;hwHDq{{~T2T-p#Bx z>ze87TIqw*PnCZp!~DTkKUx&Fclew4TpQTD{;jJ&Eqw>N)_TUqOESz3ydFS&1USNr zC?MS8W8MrqhrK?wvIfCIjJ@?>ORfP_!je25T?N>BVkISmmZq3dwDmWbY&2P`YH4d}WV7DbWCO~=%F4;kDaOq$wn0H!VZ&c8xc{jES`B&* zLZgE<6$%zAR^Uep0v$aAM7IpCCW^=72zWX=0?Zm%JFp)xIO(`V6?N&kEgc!e0(eN# z$+?W;>x-*-t)6|5P;#QiFfsEj=NAx^TqPyFT4tTHimDo!qNi_QxWUNS#Cr1<8{4hh z?3`U(-P}Dqy#j;w>)uL! z#6nt(=)V_G%>PqFKLh$153LH}WJE%c96Txtn_3jliPByoEO@wiIq{r`Hu7#=gW1{L zPiqFMefrCb6ao@^whUArr>dl8TQ4XN$+X(7OnRPkvTv8o&7({jv$|oo&kW;=ea zI98h`n#_4Thb7KqqMhB(*W;O2=lfT+-&Vb(A!hA@ktKXqg_)bqOWh3Tq#+$ME#`C& z-X6?dpVHMr9!gYf<5jqv%3!9K^^QnF-0BoGwPj*j-})3fA2ht=QMZ|n@QuO5x5Qju z*|_XV%ey-f58vSiUl~hyr@JO+Gc?-AFej?v#f!qi9q!%Scr2v!kx;0`x%nr_mE{dN zS^goTiFracte0dvhb{NrRG8Pxi*z^k@)2p8;?#0Y@^oXv_k( z=zyP4KL?wF?x#odGBrLbMln4v6RHzCo4kytTL?5{y5OR3u6Rx}R+b(n7IzDC z(r0XAR<&J|kQG$35)q1`?{UbwJr}Ue=Cfg!i}_6fDlY$~LcY?=qAj?p=xz>4Xoj&x6Z@V$> zc&Bt%HIkgAbz=Ky$6(~@F=ID1fz+%nHTq@JKm9=HTL6%Xr z8u-K>7I&f4tw|T!X4t`Bw7Fql9bm0YwtoXc&qzK$wccr*kg%yp` zHRE`xr__Kah8Oz|N_QVX1Y(ito#Ba*b}bpIcIo>)O30686Pi?s4djL^tuJj$w;!gu zzR#z`yJWf@F*Q_mQ|YMpbPi-~Idbq-z{{t{ivtfiZ7?q)MJ%Kh7McorPS$mAJ38b! zganJXsTGbilP;~2;_FW#7xY^FVX!(VUuQH7!A>SZ5Jo~Q2){ON8)$bqPb-zVm9|(ZowazvpR27Zm?5Zk#T^R z8agq5<Nn47(_; zIar=9nBuQnB@wkkqJMPN2FWf)tbG;^y zWwnlC9XhrUQYc9{?>8D~w)vTGiP<5^*ipYme@k#SvHh)3g9ybQul9?IVR zxnLk5*pOM%zJtm1l@RmAU{F02z$bVGbz zN2!1FwCVB{Pu#ZraStkaYwU$cg4PzZwLNsl|sUA6-+)MSc?%79Kn^ zW;bb2aDUXlSnhL(P|X*O%QKqld!Oj-)i*n{ONJ~ljlbqQzdtS0r|GIaU*5ZtS=Wd2 z@miKy?6>hCS6qt_aC@>i-CPE>E*Yu0e`wrj4BvN5N|ZP9JyH|2V-wc|cr zeV#!kD(0c*xzr;d!3ETvrS6EFSWQR8=UY!K&lT%+o#c(-)@hNc*+6}g1Iq)UDuFYlZS;7ONJ;whCp z<1iXsKQ!C^jLyY&HuL8G5B+_)F&!_+rk?59>{rxGKN zjF>5Sb|tLcZ4fM$w^7vgIHHhS!Y(vDN<-dV5m$WZ%$R(0`AEe;y+}*>!049uXEW~o zF;GmGKAvNpVjXI+EtHsbF*|};!G^?>q>;9%4|%Ta9%$rwx#8N@!)ec)rxdSGMO{*x z{}7^9&{QYAH)=iGvFy9e-pt7xoyoOQx!WYE5^Ep&NM)h;L>YqEHbE7qD5I_Ao5l-8 z2}Q3Rshy(q%B1tYq|7<@Omr^0InQ(33pKhV@;tk6M821-w)P}%k=O8rBJr-EM-nn; z+OxW<-t5=(s=a#rTXFIVuSv6~$XJZdL`S}6!rSZjHaA2yHRg}j3urdff6{zcTUowr zh2QB53(GgyuT-nsfA{t`?+BaX)uo@L?RSM9Kdi&{RWIsMLjA3g?7AEM&x^)hHyu9S zxU4@>Df^n)xf_Fn;Rh0uNg*jMR#Amx$Cw@n8ATnh37T2Rw06$wJQwPF^&rd2(=fM( zUX>P_dNh?@Kj1I!%&>;KC1hsaR3*g3=QwxS_O|kCD?Da?>xXl=!Bnu9HYQ+Bx&@ zmL1tWKV0MZpn-A6!)4QNJ_+Avo36?nQZHMuT@k16D!_f1Pf^HW-n?aw2Y0{46!%bpgcuHKfsJurVVH%Yjq4c3{> zGv|+w3QP(3CztpU66>>fE!^{RUR6|GQMmD?TQb`vcZ$8?Th8{JgzFC{T!NcrKD4bX z?dEzUvnE34I7{Ntc*n=|iYr3-CIbcDD>N&dlob0A<9YEf`PYv4I+e85mee0yI~dzS zU}4`BXzP0+@0v4T$9jF~wW~$Thq~?&#E(FLQf~ z!kEOxL-d z=i*SASt>c}Zzp(60c^vF_38^%4 zY0$p(>FSj|?oN_~6#5?L;;Xr)kEWE!&CLxlJB%B-Lirz}b#xnf=czfN=NWtP0Tm*z zXoz-4eo#a&ll8s9tAdTSD~x%V8x8Ts`~m`_@q1{<%4y5#P?yvI%lJMd(nQqw!US)N ztBX&F+eh(3^TQ{1Wh){%KK96RVHzShQnbd$Z=jsZZhg1bKIiz4@pDNZ3TORtjo&w& z^?hKfZeCjc;ag?o?whC1wv-*-(6c>#MlAQEYL}Sg`h%MzhW(ACyIAf99^W4Cj7ABw z8f=}?t-maLcT>TIuRbTAa?FP4KaA}(CkpLpz+VfTHK6(&Y9vHEa~Q|s6Nb>Zw!2(foYSYUOyIQ{g9*D;cT;6nlGwRDE3 z@{#rBqgJ({LdnV8QThJWiR@CsvT|+%j$r~bGwO(~n}mObPJeht@JVOJ8NJ3e#_G`x z^Wwj-pzB8gfTm zy}Hg!bC%bXNpm9&@#Y#CHx4A>woAAqnos6i(kC-M^p6P3WdC@<*|zB7%~Xk-V`7;? z4p%fInV*+F7w+21b%?5%g3Ph?u2${5{Mqqp%jLUy?&hbh0;Siu62~_QB}RPQQntNe z{JC=vV$&cQn5369m7HxC_vPe(Gim#2)23)n@kD)Qh8>-C8}5_GMp|=I=9!i?9Eu_i z#75q!{Khajw>ez7rD^@ia~su9^}Wp5c((are0ac^e();oETiYA$PM-W0WJNkR%S0$ z2TeOOQx3$v?&Dwe*UFFBgQ{B>Jspy1<_~^ui%yt7ix3G&7*OPnG z6ZEg@#7Aw-6iS=<+_Em#Wh0g6PQk(MDya!J8qzmvT5D*Sp}c=!MZuvIi$m!Fz9FL31I(!?&+r!bf!Daztf3hr8hlU%e-tlZDc= z$ZtVs47R3=a95(X@v$h&Vf`bSIO?w9@Xgu|hh;;P ziL5U&878$m=Ztn=DimapHBd1ItDLb=(dXS*qOlCO08&Qu(GZgq zVXGnM{Po)$iFc%{_-8{`-WD%&@VRq6BXZ7Q_>e<#avokb?j;3h$d2Eu6wmVY`PT$y z##5!O@7KRy??AmYex+KIOoshun8mAdzHWgRCoQY?eVoxltED_NKh9c79y5!z&|z{vjmaxmTl2x>k%0>DIOONeUw(cSsw%Z=+=MxveXHT_e-Z`;0k!d`Sy7il9pjPee zZfaGm7S!S|#92{3`>^pIp%-@$iwGiH}`c=-y)sUqsD&*b% z>~tDZP#o@SQyb#E(c!3@=)D<%x~Dnl#0DL^ z($&EX8(SMkroiSu6cqfKKG6{7uGXQRWBj7=#1j@IyEUKZoJHd9T7@4qtg$&cV=9rf ze1glM=5Qzxsam)-FH32B@Mg5!(N1Cun~MWu@1TATz9*q)fxb9whhm{+nrDD!9%&8L zha}fjr&M;JpL=$tx0Uxn>HPa*V~tm|cP-rYNUJ`vs=_wy2L6;2T4JcWfBzTlLmPRn zH{1&|p3F{*=C7W~-*LpNPGjV@e{!xwboCo%hSLI7oA592pT0@&byrIGEGU4y8jH&7 zM=M6mGCr&lV_<9QU7p-6vg^Z@vQNqv)6NKOs~boNp)&5Q8BrNH`tqs40<+)8dw=0c~&>Xs7^wG=7MM#y@1on3vAF?wao0IN_u z!MAH!W?vQ!3BTV_-ofwF+~8{D>qvF*Wa=u-e3NyH_+}wO=W=RkjH#IEvK>c!<@!8M zb{fllh#G%sa#^U4u&;8zc$uoja|u=Rd*&%yqdM6y28*;?%J`kKO!3y^%_%=-ZhKsS z(V{P)-}(CImZ{6*J2LcZbURbn+F~wcx?jtsA*;#)I;sUc173RHWN0+A9TXpr{$|eW z<{H;{Ttjb-s>-uR^O~OcV5PpTb-UD@+xw@iWcmmV%tMj^Bhy!&4F|3b65T_($-i3Q z5x(uhMz$Pr+z1hNLOEEW*AUQ4J->Os#xJItbSr&?Nv9giVotTA@Rn#pKLsZXvn`?P_>dugZ%x zSYEhVOtQL92;cef^|hhdo_n%KNo({3&-?5+AAVBYe+ZUB6ZutLgs2UVhMC5!uXy&f zoFK*DdzKoHuitt9a>307{f5KUmN#M;n6>V~Hvg`>W$AeQ%J@T@u7nP>_!jRWj`w|- z7Wo!79CSs3`Qt+(qnP9ge|mKG_C)IuN$CPXgJgU2-EP!kdMde-q&t$-Ch{$C+Y41i z6PEo}gn;DRq1_Et~B~!zBXy;b;#)}Dg77qM! zNAI$2uo-&6k53xda;~iTNa^#q4GtDYS5~XmY9|elAg) znOrC0Vz9x_6?+jXi6;_rvS#p@>0Y_N|3jvl(CPayDNo)eeS#2&u=$TQAti&Sqh9$UEC;E z`i4s;;7n6$$yfMDODl-Kz7MO{PHuPNs zovEJwL7woLEoRfv$!|}Prj!&jamlo)r!&9x5uYDDi&^ol$qk0xEPPL;2mj~*hy6hz zz#knW_RSF;GXn!XBMUPN%QEI=%UIdCI9S;@*_JKi;N#%r=Hcb#Wno{=&&R{h#ly>k z8A0)&L!e`#qhsP>UB=4umkaFyd~Soj!A0Rw_*m%s=QcI)@7K_upGSTFs0d-C)gU~7 zCq;gIo5O*J58cqTCGzj)cLsiE;CBXoXW(}R{!eBg5_?*5{O4zA6+A5&;Tek3;rJLZ z2IoI!z!$yR=s#uP{$%`@qtI(Fjq+bIP;F5dW$EkM#ZTHfQj8^j|NYLu?+pCT!0!zF a&cOfK4E&3yGsnMtIxjK)$+H(@bo~eJ>-CcW literal 0 HcmV?d00001 diff --git a/scrapers/EmbeddedMetadata/test_config.py b/scrapers/EmbeddedMetadata/test_config.py new file mode 100644 index 000000000..f1a55e108 --- /dev/null +++ b/scrapers/EmbeddedMetadata/test_config.py @@ -0,0 +1,3 @@ +skip_ensure_requirements = True + +FORCE_METHOD = None \ No newline at end of file diff --git a/scrapers/EmbeddedMetadata/test_embedded_metadata.py b/scrapers/EmbeddedMetadata/test_embedded_metadata.py new file mode 100644 index 000000000..aba511a81 --- /dev/null +++ b/scrapers/EmbeddedMetadata/test_embedded_metadata.py @@ -0,0 +1,67 @@ +import os +import sys +import unittest + +import embedded_metadata + +import test_config + + +class TestEmbeddedMetadataExiftool(unittest.TestCase): + + @classmethod + def setUpClass(cls): + test_config.FORCE_METHOD = "exiftool" + + def test_EXIF_IPTC_XMP_keywords_title(self): + data = embedded_metadata.process_image(os.path.join(os.path.dirname(__file__), 'test-data/EXIF_IPTC_XMP_keywords_title.jpg')) + + self.assertSetEqual({'Title', 'Date', 'Tags', 'Details'}, set(data.keys())) + self.assertEqual("Blue Square Test File - .jpg", data['Title']) + self.assertEqual("2005-09-07", data['Date']) + self.assertListEqual( + [{"Name": "XMP"}, {"Name": "Blue Square"}, {"Name": "test file"}, {"Name": "Photoshop"}, {"Name": ".jpg"}], + data['Tags'] + ) + self.assertEqual(('XMPFiles BlueSquare test file, created in Photoshop CS2, saved as .psd, ' + '.jpg, and .tif.\n' + 'XMPFiles BlueSquare test file, created in Photoshop CS2, saved as .psd, ' + '.jpg, and .tif.\n' + '\n'), data['Details']) + + def test_EXIF_camera(self): + data = embedded_metadata.process_image(os.path.join(os.path.dirname(__file__), 'test-data/EXIF_camera.jpg')) + + self.assertSetEqual(set(data.keys()), {'Date'}) + self.assertEqual("2008-11-01", data['Date']) # not the same as pyexiv2, exiftool does not return exif.DateTime + + +class TestEmbeddedMetadataPyexiv2tool(unittest.TestCase): + + @classmethod + def setUpClass(cls): + test_config.FORCE_METHOD = "pyexiv2" + + def test_EXIF_IPTC_XMP_keywords_title(self): + data = embedded_metadata.process_image(os.path.join(os.path.dirname(__file__), 'test-data/EXIF_IPTC_XMP_keywords_title.jpg')) + + self.assertSetEqual({'Title', 'Date', 'Tags', 'Details'}, set(data.keys())) + self.assertEqual("Blue Square Test File - .jpg", data['Title']) + self.assertEqual("2005-09-07", data['Date']) + self.assertListEqual( + [{"Name": "XMP"}, {"Name": "Blue Square"}, {"Name": "test file"}, {"Name": "Photoshop"}, {"Name": ".jpg"}], + data['Tags'] + ) + self.assertEqual(('XMPFiles BlueSquare test file, created in Photoshop CS2, saved as .psd, ' + '.jpg, and .tif.\n' + 'XMPFiles BlueSquare test file, created in Photoshop CS2, saved as .psd, ' + '.jpg, and .tif.\n' + 'XMPFiles BlueSquare test file, created in Photoshop CS2, saved as .psd, ' + '.jpg, and .tif.\n' + '\n'), data['Details']) + + def test_EXIF_camera(self): + data = embedded_metadata.process_image(os.path.join(os.path.dirname(__file__), 'test-data/EXIF_camera.jpg')) + + self.assertSetEqual(set(data.keys()), {'Date'}) + self.assertEqual("2008-10-22", data['Date']) From d46d2fcdba054a895b145ad057230349a5acf4fb Mon Sep 17 00:00:00 2001 From: spaceyuck Date: Sat, 9 May 2026 01:02:57 +0200 Subject: [PATCH 3/4] [EmbeddedMetadata] added support for XMP keywords as tags, IPTC ObjectName as title --- .../EmbeddedMetadata/embedded_metadata.py | 6 ++++-- .../EXIF_IPTC_XMP_keywords_title.jpg | Bin 9722 -> 9436 bytes .../test_embedded_metadata.py | 11 +++++------ 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/scrapers/EmbeddedMetadata/embedded_metadata.py b/scrapers/EmbeddedMetadata/embedded_metadata.py index 703f4ffa5..12218332e 100755 --- a/scrapers/EmbeddedMetadata/embedded_metadata.py +++ b/scrapers/EmbeddedMetadata/embedded_metadata.py @@ -174,7 +174,9 @@ def process_image(image_path: str): # title # title_details = '' - for field in ['Exif.Image.DocumentName', 'EXIF:DocumentName', 'Exif.Image.XPTitle', 'EXIF:XPTitle', 'Exif.Photo.ImageTitle', 'EXIF:ImageTitle', 'Iptc.Application2.Headline', 'IPTC:Headline','Xmp.dc.title', 'XMP:Title', 'Xmp.dc.headline', 'XMP:Headline']: + for field in ['Exif.Image.DocumentName', 'EXIF:DocumentName', 'Exif.Image.XPTitle', 'EXIF:XPTitle', 'Exif.Photo.ImageTitle', 'EXIF:ImageTitle', + 'Iptc.Application2.Headline', 'IPTC:Headline', 'Xmp.dc.title', 'XMP:Title', 'Xmp.dc.headline', 'XMP:Headline', + 'Iptc.Application2.ObjectName', 'IPTC:ObjectName']: if field not in data: continue value = data[field] @@ -235,7 +237,7 @@ def process_image(image_path: str): # # tags # - for field in ['Exif.Image.XPKeywords', 'EXIF:XPKeywords', 'Iptc.Application2.Keywords', 'IPTC:Keywords']: + for field in ['Exif.Image.XPKeywords', 'EXIF:XPKeywords', 'Iptc.Application2.Keywords', 'IPTC:Keywords', 'Xmp.pdf.Keywords', 'XMP:Keywords']: if field not in data: continue value = data[field] diff --git a/scrapers/EmbeddedMetadata/test-data/EXIF_IPTC_XMP_keywords_title.jpg b/scrapers/EmbeddedMetadata/test-data/EXIF_IPTC_XMP_keywords_title.jpg index 09a5e25467b22f226cdaaabd09591eaa1f1d29ea..eca3c3dd665fdeff86a98bb8e746f7e05d6d02a7 100755 GIT binary patch delta 372 zcmez6eaCZxc>OHbip(?y247zWEd~Y#4hAj;ZU!bGOBjgT82P~L2q1eFqYwi#kY)rz z21Y3cRxq1^;S-}Yoc)VY1F8mS0;3^VuN;uY#ApO&ivZaQOu`6r+B20HSRe*5ngAus zvOum0$Lm6CK2TFEL6pumU-ZKzxf) zgMk^$1}b4R1S^yS@*go8f!QJ;WlX|QH9#HhnG7rty^J8au!%dqsxCno$hZdCKqf}G z;w_smF}`DG5<59LkXvN)HBM

y*^uXwn8#}@=GptEd?8( zU~y?uR%$ZD2C#KbiRmcT*+uvUOuoP`#_g0-nyL_7SejUr3gUB?q!yPbq-EwncmWyt zCHch}`2`@kbiJ&CbS$ocnhkahlEol*lw_9Vq>}3C$+tLUl(E=@>}!z0lM99IH+ysM QVA?#7?+erBV}fGB00eA{&;S4c diff --git a/scrapers/EmbeddedMetadata/test_embedded_metadata.py b/scrapers/EmbeddedMetadata/test_embedded_metadata.py index aba511a81..fbe1ab18d 100644 --- a/scrapers/EmbeddedMetadata/test_embedded_metadata.py +++ b/scrapers/EmbeddedMetadata/test_embedded_metadata.py @@ -20,13 +20,12 @@ def test_EXIF_IPTC_XMP_keywords_title(self): self.assertEqual("Blue Square Test File - .jpg", data['Title']) self.assertEqual("2005-09-07", data['Date']) self.assertListEqual( - [{"Name": "XMP"}, {"Name": "Blue Square"}, {"Name": "test file"}, {"Name": "Photoshop"}, {"Name": ".jpg"}], + [{"Name": "XMP"}, {"Name": "Blue Square"}, {"Name": "test file"}, {"Name": "Photoshop"}, {"Name": ".jpg"}, + {'Name': 'XMP keyword 1'},{'Name': 'XMP keyword 2'}], data['Tags'] ) self.assertEqual(('XMPFiles BlueSquare test file, created in Photoshop CS2, saved as .psd, ' '.jpg, and .tif.\n' - 'XMPFiles BlueSquare test file, created in Photoshop CS2, saved as .psd, ' - '.jpg, and .tif.\n' '\n'), data['Details']) def test_EXIF_camera(self): @@ -49,15 +48,15 @@ def test_EXIF_IPTC_XMP_keywords_title(self): self.assertEqual("Blue Square Test File - .jpg", data['Title']) self.assertEqual("2005-09-07", data['Date']) self.assertListEqual( - [{"Name": "XMP"}, {"Name": "Blue Square"}, {"Name": "test file"}, {"Name": "Photoshop"}, {"Name": ".jpg"}], + [{"Name": "XMP"}, {"Name": "Blue Square"}, {"Name": "test file"}, {"Name": "Photoshop"}, {"Name": ".jpg"}, + {'Name': 'XMP keyword 1'},{'Name': 'XMP keyword 2'} + ], data['Tags'] ) self.assertEqual(('XMPFiles BlueSquare test file, created in Photoshop CS2, saved as .psd, ' '.jpg, and .tif.\n' 'XMPFiles BlueSquare test file, created in Photoshop CS2, saved as .psd, ' '.jpg, and .tif.\n' - 'XMPFiles BlueSquare test file, created in Photoshop CS2, saved as .psd, ' - '.jpg, and .tif.\n' '\n'), data['Details']) def test_EXIF_camera(self): From 7ce771305ccb6d550b9d65f775c48ae27499446c Mon Sep 17 00:00:00 2001 From: spaceyuck Date: Fri, 15 May 2026 08:40:11 +0200 Subject: [PATCH 4/4] [EmbeddedMetadata] moved tests and test data to subfolder of scraper-tests --- scraper-tests/EmbeddedMetadata/config.py | 7 +++++++ .../test-data/EXIF_IPTC_XMP_keywords_title.jpg | Bin .../EmbeddedMetadata/test-data/EXIF_camera.jpg | Bin .../EmbeddedMetadata/test_embedded_metadata.py | 16 +++++++++++----- scrapers/EmbeddedMetadata/embedded_metadata.py | 9 +-------- scrapers/EmbeddedMetadata/test_config.py | 3 --- 6 files changed, 19 insertions(+), 16 deletions(-) create mode 100644 scraper-tests/EmbeddedMetadata/config.py rename {scrapers => scraper-tests}/EmbeddedMetadata/test-data/EXIF_IPTC_XMP_keywords_title.jpg (100%) rename {scrapers => scraper-tests}/EmbeddedMetadata/test-data/EXIF_camera.jpg (100%) rename {scrapers => scraper-tests}/EmbeddedMetadata/test_embedded_metadata.py (79%) delete mode 100644 scrapers/EmbeddedMetadata/test_config.py diff --git a/scraper-tests/EmbeddedMetadata/config.py b/scraper-tests/EmbeddedMetadata/config.py new file mode 100644 index 000000000..cdac14e88 --- /dev/null +++ b/scraper-tests/EmbeddedMetadata/config.py @@ -0,0 +1,7 @@ +# +# gets imported when running scraper from test instead of optional config.py in scraper directory +# + +skip_ensure_requirements = True + +FORCE_METHOD = None \ No newline at end of file diff --git a/scrapers/EmbeddedMetadata/test-data/EXIF_IPTC_XMP_keywords_title.jpg b/scraper-tests/EmbeddedMetadata/test-data/EXIF_IPTC_XMP_keywords_title.jpg similarity index 100% rename from scrapers/EmbeddedMetadata/test-data/EXIF_IPTC_XMP_keywords_title.jpg rename to scraper-tests/EmbeddedMetadata/test-data/EXIF_IPTC_XMP_keywords_title.jpg diff --git a/scrapers/EmbeddedMetadata/test-data/EXIF_camera.jpg b/scraper-tests/EmbeddedMetadata/test-data/EXIF_camera.jpg similarity index 100% rename from scrapers/EmbeddedMetadata/test-data/EXIF_camera.jpg rename to scraper-tests/EmbeddedMetadata/test-data/EXIF_camera.jpg diff --git a/scrapers/EmbeddedMetadata/test_embedded_metadata.py b/scraper-tests/EmbeddedMetadata/test_embedded_metadata.py similarity index 79% rename from scrapers/EmbeddedMetadata/test_embedded_metadata.py rename to scraper-tests/EmbeddedMetadata/test_embedded_metadata.py index fbe1ab18d..9d1836ba0 100644 --- a/scrapers/EmbeddedMetadata/test_embedded_metadata.py +++ b/scraper-tests/EmbeddedMetadata/test_embedded_metadata.py @@ -2,19 +2,24 @@ import sys import unittest +# add scraper directory to loadable module path +sys.path.insert(0, os.path.join(os.path.dirname(__file__),'..','..','scrapers', 'EmbeddedMetadata')) +# add py_common path to module path +sys.path.insert(1, os.path.join(os.path.dirname(__file__),'..','..','scrapers')) import embedded_metadata -import test_config +import config class TestEmbeddedMetadataExiftool(unittest.TestCase): @classmethod def setUpClass(cls): - test_config.FORCE_METHOD = "exiftool" + config.FORCE_METHOD = "exiftool" def test_EXIF_IPTC_XMP_keywords_title(self): - data = embedded_metadata.process_image(os.path.join(os.path.dirname(__file__), 'test-data/EXIF_IPTC_XMP_keywords_title.jpg')) + data = embedded_metadata.process_image(os.path.join(os.path.dirname(__file__), + 'test-data/EXIF_IPTC_XMP_keywords_title.jpg')) self.assertSetEqual({'Title', 'Date', 'Tags', 'Details'}, set(data.keys())) self.assertEqual("Blue Square Test File - .jpg", data['Title']) @@ -39,10 +44,11 @@ class TestEmbeddedMetadataPyexiv2tool(unittest.TestCase): @classmethod def setUpClass(cls): - test_config.FORCE_METHOD = "pyexiv2" + config.FORCE_METHOD = "pyexiv2" def test_EXIF_IPTC_XMP_keywords_title(self): - data = embedded_metadata.process_image(os.path.join(os.path.dirname(__file__), 'test-data/EXIF_IPTC_XMP_keywords_title.jpg')) + data = embedded_metadata.process_image(os.path.join(os.path.dirname(__file__), + 'test-data/EXIF_IPTC_XMP_keywords_title.jpg')) self.assertSetEqual({'Title', 'Date', 'Tags', 'Details'}, set(data.keys())) self.assertEqual("Blue Square Test File - .jpg", data['Title']) diff --git a/scrapers/EmbeddedMetadata/embedded_metadata.py b/scrapers/EmbeddedMetadata/embedded_metadata.py index 12218332e..6f2c488d4 100755 --- a/scrapers/EmbeddedMetadata/embedded_metadata.py +++ b/scrapers/EmbeddedMetadata/embedded_metadata.py @@ -1,7 +1,5 @@ from datetime import datetime -import hashlib import json -import os import sys from py_common import graphql, log @@ -12,12 +10,7 @@ try: import config except: - # is unit test -> load test config - if 'unittest' in sys.modules: - import test_config - config = test_config - else: - config = object() + config = object() skip_ensure_requirements = config.skip_ensure_requirements if hasattr(config, 'skip_ensure_requirements') else False diff --git a/scrapers/EmbeddedMetadata/test_config.py b/scrapers/EmbeddedMetadata/test_config.py deleted file mode 100644 index f1a55e108..000000000 --- a/scrapers/EmbeddedMetadata/test_config.py +++ /dev/null @@ -1,3 +0,0 @@ -skip_ensure_requirements = True - -FORCE_METHOD = None \ No newline at end of file