Skip to content

Commit

Permalink
XWIKI-20715: Don't use user-provided filenames in office import
Browse files Browse the repository at this point in the history
* Add a method to DocumentAccessBridge for setting the attachment
  content from an input stream
* Apply some basic cleaning to filenames in OfficeConverterFileStorage
* Use fixed filenames for input/output in the office importer and only
  keep the extension.
* Introduce an interface OfficeDocumentArtifact to represent artifacts
  with two implementations that are backed by a file or a byte array,
  respectively.
* Provide a map between true filename and the OfficeDocumentArtifact
  instead of just a set of files in the conversion result.
* Adapt code using the changed APIs and tests.
* Deprecate methods that used the `Set<File>` API for artifacts and move
  them to legacy.
* Replace the static prefix on images by the name of the input file.
* Add a test for the image prefix replacement.
* Use a real XHTMLMarkerResourceReferenceSerializer in ImageFilterTest
  as it simplifies the tests (and adds just a single class as
  dependency).
  • Loading branch information
michitux committed Mar 31, 2023
1 parent e4be3cf commit 45d182a
Show file tree
Hide file tree
Showing 33 changed files with 836 additions and 235 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import java.util.List;
import java.util.Map;

import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.NotImplementedException;
import org.xwiki.component.annotation.Role;
import org.xwiki.model.EntityType;
Expand Down Expand Up @@ -539,6 +540,23 @@ default InputStream getAttachmentContent(EntityReference reference) throws Excep
*/
void setAttachmentContent(AttachmentReference attachmentReference, byte[] attachmentData) throws Exception;

/**
* Sets the content of a document attachment. If the document or the attachment does not exist, both will be created
* newly.
*
* @param attachmentReference the name of the attachment to access
* @param attachmentData Attachment content.
* @throws Exception If the storage cannot be accessed.
* @since 14.10.8
* @since 15.3RC1
*/
@Unstable
default void setAttachmentContent(AttachmentReference attachmentReference, InputStream attachmentData)
throws Exception
{
setAttachmentContent(attachmentReference, IOUtils.toByteArray(attachmentData));
}

/**
* Sets the content of a document attachment. If the document or the attachment does not exist, both will be created
* newly.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@
*/
package org.xwiki.officeimporter.document;

import java.io.File;
import java.util.Collections;
import java.util.Map;
import java.util.Set;

public interface CompatibilityOfficeDocument
{
Expand All @@ -30,8 +33,25 @@ public interface CompatibilityOfficeDocument
* as artifacts.
*
* @return a map containing artifacts for this document.
* @deprecated Since 13.1RC1 use {@link OfficeDocument#getArtifactsFiles()}.
* @deprecated Since 13.1RC1 use {@link #getArtifactsFiles()}.
*/
@Deprecated
Map<String, byte[]> getArtifacts();

/**
* Returns the files corresponding to all the artifacts for this office document, except the conversion of the
* document itself.
* Artifacts are generated during the import operation if the original office document contains embedded
* non-textual elements. Also, some office formats (like presentations) result in multiple output files when
* converted into html. In this case all these output files will be considered as artifacts.
*
* @return the set of artifacts related to this office document.
* @since 13.1RC1
* @deprecated Use {@link OfficeDocument#getArtifactsMap()} instead.
*/
@Deprecated(since = "15.3RC1, 14.10.8")
default Set<File> getArtifactsFiles()
{
return Collections.emptySet();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,23 +20,31 @@
package org.xwiki.officeimporter.document;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;

import org.apache.commons.io.IOUtils;
import org.xwiki.component.manager.ComponentManager;
import org.xwiki.officeimporter.OfficeImporterException;
import org.xwiki.officeimporter.converter.OfficeConverterResult;
import org.xwiki.officeimporter.internal.document.ByteArrayOfficeDocumentArtifact;
import org.xwiki.officeimporter.internal.document.FileOfficeDocumentArtifact;
import org.xwiki.rendering.block.XDOM;

public privileged aspect XDOMOfficeDocumentCompatibilityAspect
{
@Deprecated
private Map<String, byte[]> XDOMOfficeDocument.artifacts;

@Deprecated
private Set<File> XDOMOfficeDocument.fileArtifacts;

/**
* Creates a new {@link XDOMOfficeDocument}.
*
Expand All @@ -48,10 +56,33 @@ public privileged aspect XDOMOfficeDocumentCompatibilityAspect
@Deprecated
public XDOMOfficeDocument.new(XDOM xdom, Map<String, byte[]> artifacts, ComponentManager componentManager)
{
this(xdom, Collections.emptySet(), componentManager, null);
this(xdom, artifacts.entrySet().stream()
.map(entry -> new ByteArrayOfficeDocumentArtifact(entry.getKey(), entry.getValue()))
.collect(Collectors.toMap(ByteArrayOfficeDocumentArtifact::getName, Function.identity())),
componentManager, null);
this.artifacts = artifacts;
}

/**
* Creates a new {@link XDOMOfficeDocument}.
*
* @param xdom {@link XDOM} corresponding to office document content.
* @param artifactFiles artifacts for this office document.
* @param componentManager {@link ComponentManager} used to lookup for various renderers.
* @param converterResult the {@link OfficeConverterResult} used to build that object.
* @since 13.1RC1
* @deprecated Use {@link #XDOMOfficeDocument(XDOM, Map, ComponentManager, OfficeConverterResult)} instead.
*/
@Deprecated(since = "14.10.8, 15.3RC1")
public XDOMOfficeDocument.new(XDOM xdom, Set<File> artifactFiles, ComponentManager componentManager,
OfficeConverterResult converterResult)
{
this(xdom, artifactFiles.stream().collect(Collectors.toMap(File::getName,
file -> new FileOfficeDocumentArtifact(file.getName(), file))),
componentManager, converterResult);
this.fileArtifacts = artifactFiles;
}

/**
* Overrides {@link CompatibilityOfficeDocument#getArtifacts()}.
*/
Expand All @@ -60,20 +91,26 @@ public privileged aspect XDOMOfficeDocumentCompatibilityAspect
{
if (this.artifacts == null) {
this.artifacts = new HashMap<>();
FileInputStream fis = null;

for (File file : this.artifactFiles) {
try {
fis = new FileInputStream(file);
this.artifacts.put(file.getName(), IOUtils.toByteArray(fis));
} catch (IOException e) {
for (Map.Entry<String, OfficeDocumentArtifact> mapItem : this.artifactsMap.entrySet()) {
OfficeDocumentArtifact artifact = mapItem.getValue();
String fileName = mapItem.getKey();
try (InputStream is = artifact.getContentInputStream()) {
this.artifacts.put(fileName, IOUtils.toByteArray(is));
} catch (OfficeImporterException | IOException e) {
// FIXME
e.printStackTrace();
} finally {
IOUtils.closeQuietly(fis);
}
}
}
return this.artifacts;
}

/**
* Overrides {@link CompatibilityOfficeDocument#getArtifactsFiles()}.
*/
@Deprecated
public Set<File> XDOMOfficeDocument.getArtifactsFiles()
{
return this.fileArtifacts != null ? this.fileArtifacts : Collections.emptySet();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,22 +20,30 @@
package org.xwiki.officeimporter.document;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;

import org.apache.commons.io.IOUtils;
import org.w3c.dom.Document;
import org.xwiki.officeimporter.OfficeImporterException;
import org.xwiki.officeimporter.converter.OfficeConverterResult;
import org.xwiki.officeimporter.internal.document.ByteArrayOfficeDocumentArtifact;
import org.xwiki.officeimporter.internal.document.FileOfficeDocumentArtifact;

public privileged aspect XHTMLOfficeDocumentCompatibilityAspect
{
@Deprecated
private Map<String, byte[]> XHTMLOfficeDocument.artifacts;

@Deprecated
private Set<File> XHTMLOfficeDocument.fileArtifacts;

/**
* Creates a new {@link XHTMLOfficeDocument}.
*
Expand All @@ -46,10 +54,31 @@ public privileged aspect XHTMLOfficeDocumentCompatibilityAspect
@Deprecated
public XHTMLOfficeDocument.new(Document document, Map<String, byte[]> artifacts)
{
this(document, Collections.emptySet(), null);
this(document, artifacts.entrySet().stream()
.map(entry -> new ByteArrayOfficeDocumentArtifact(entry.getKey(), entry.getValue()))
.collect(Collectors.toMap(ByteArrayOfficeDocumentArtifact::getName, Function.identity())),
null);
this.artifacts = artifacts;
}

/**
* Creates a new {@link XHTMLOfficeDocument}.
*
* @param document the w3c dom representing the office document.
* @param artifactFiles artifacts for this office document.
* @param converterResult the {@link OfficeConverterResult} used to build that object.
* @since 13.1RC1
* @deprecated Use {@link #XHTMLOfficeDocument(Document, Map, OfficeConverterResult)} instead.
*/
@Deprecated(since = "14.10.8, 15.3RC1")
public XHTMLOfficeDocument.new(Document document, Set<File> artifactFiles, OfficeConverterResult converterResult)
{
this(document, artifactFiles.stream().collect(Collectors.toMap(File::getName,
file -> new FileOfficeDocumentArtifact(file.getName(), file))),
converterResult);
this.fileArtifacts = artifactFiles;
}

/**
* Overrides {@link CompatibilityOfficeDocument#getArtifacts()}.
*/
Expand All @@ -58,20 +87,26 @@ public privileged aspect XHTMLOfficeDocumentCompatibilityAspect
{
if (this.artifacts == null) {
this.artifacts = new HashMap<>();
FileInputStream fis = null;

for (File file : this.artifactFiles) {
try {
fis = new FileInputStream(file);
this.artifacts.put(file.getName(), IOUtils.toByteArray(fis));
} catch (IOException e) {
for (Map.Entry<String, OfficeDocumentArtifact> mapItem : this.artifactsMap.entrySet()) {
OfficeDocumentArtifact artifact = mapItem.getValue();
String fileName = mapItem.getKey();
try (InputStream is = artifact.getContentInputStream()) {
this.artifacts.put(fileName, IOUtils.toByteArray(is));
} catch (OfficeImporterException | IOException e) {
// FIXME
e.printStackTrace();
} finally {
IOUtils.closeQuietly(fis);
}
}
}
return this.artifacts;
}

/**
* Overrides {@link CompatibilityOfficeDocument#getArtifactsFiles()}.
*/
@Deprecated(since = "14.10.8, 15.3RC1")
public Set<File> XHTMLOfficeDocument.getArtifactsFiles()
{
return this.fileArtifacts != null ? this.fileArtifacts : Collections.emptySet();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,12 @@
package org.xwiki.officeimporter.document;

import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.util.Collections;
import java.util.Set;
import java.util.Map;

import org.xwiki.officeimporter.converter.OfficeConverterResult;
import org.xwiki.stability.Unstable;

/**
* Represents an office document being imported.
Expand Down Expand Up @@ -57,13 +57,17 @@ public interface OfficeDocument extends Closeable
* Artifacts are generated during the import operation if the original office document contains embedded
* non-textual elements. Also, some office formats (like presentations) result in multiple output files when
* converted into html. In this case all these output files will be considered as artifacts.
* <p>
* The key of the map is the artifact name.
*
* @return the set of artifacts related to this office document.
* @since 13.1RC1
* @return the map of artifact names to artifacts related to this office document
* @since 14.10.8
* @since 15.3RC1
*/
default Set<File> getArtifactsFiles()
@Unstable
default Map<String, OfficeDocumentArtifact> getArtifactsMap()
{
return Collections.emptySet();
return Collections.emptyMap();
}

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/*
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership.
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this software; if not, write to the Free
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA, or see the FSF site: http://www.fsf.org.
*/
package org.xwiki.officeimporter.document;

import java.io.InputStream;

import org.xwiki.officeimporter.OfficeImporterException;
import org.xwiki.stability.Unstable;

/**
* An artifact for an office document.
*
* @version $Id$
* @since 14.10.8
* @since 15.3RC1
*/
@Unstable
public interface OfficeDocumentArtifact
{
/**
* @return the name of the artifact
*/
String getName();

/**
* @return an input stream for reading the artifact's content
*/
InputStream getContentInputStream() throws OfficeImporterException;
}
Loading

0 comments on commit 45d182a

Please sign in to comment.