Skip to content

Commit

Permalink
Add checks for duplicate ZIP entries. Fixes issue 265
Browse files Browse the repository at this point in the history
  • Loading branch information
rdeltour committed May 24, 2013
1 parent fcbb96d commit 429a4a3
Show file tree
Hide file tree
Showing 7 changed files with 95 additions and 25 deletions.
18 changes: 18 additions & 0 deletions src/main/java/com/adobe/epubcheck/ocf/OCFChecker.java
Expand Up @@ -24,11 +24,15 @@

import java.io.IOException;
import java.io.InputStream;
import java.text.Normalizer;
import java.text.Normalizer.Form;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;

import com.adobe.epubcheck.api.Report;
import com.adobe.epubcheck.opf.OPFChecker;
Expand Down Expand Up @@ -233,8 +237,22 @@ public void runChecks() {
}
}



// Check all file and directory entries in the container
try {
// Check that the container does not contain duplicate entries
Set<String> entriesSet = new HashSet<String>();
Set<String> normalizedEntriesSet = new HashSet<String>();
for (String entry : ocf.getEntries()) {
if (!entriesSet.add(entry)) {
report.error(null, -1, -1, "Duplicate entry in the ZIP file: "+entry);
} else if (!normalizedEntriesSet.add(Normalizer.normalize(entry, Form.NFC))) {
report.warning(null, -1, -1, "Duplicate entry in the ZIP file (after Unicode NFC normalization): "+entry);
}
}


for (String entry : ocf.getFileEntries()) {
if (!entry.startsWith("META-INF/")
&& !entry.startsWith("META-INF\\")
Expand Down
13 changes: 10 additions & 3 deletions src/main/java/com/adobe/epubcheck/ocf/OCFPackage.java
Expand Up @@ -4,9 +4,10 @@
import java.io.InputStream;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.List;
import java.util.Map;
import java.util.Set;

import com.adobe.epubcheck.api.Report;
import com.adobe.epubcheck.opf.OPFData;
Expand Down Expand Up @@ -51,19 +52,25 @@ public String getUniqueIdentifier() {
*/
public abstract InputStream getInputStream(String name) throws IOException;

/**
* @return a list of all the entries in this container. May contain duplicate entries (which is invalid in EPUB).
* @throws IOException
*/
public abstract List<String> getEntries() throws IOException;

/**
*
* @return a set of relative file names of files in this container
* @throws IOException
*/
public abstract HashSet<String> getFileEntries() throws IOException;
public abstract Set<String> getFileEntries() throws IOException;

/**
*
* @return a set of relative directory entries in this container
* @throws IOException
*/
public abstract HashSet<String> getDirectoryEntries() throws IOException;
public abstract Set<String> getDirectoryEntries() throws IOException;


/**
Expand Down
65 changes: 43 additions & 22 deletions src/main/java/com/adobe/epubcheck/ocf/OCFZipPackage.java
Expand Up @@ -2,19 +2,43 @@

import java.io.IOException;
import java.io.InputStream;
import java.util.Collections;
import java.util.Enumeration;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;

public class OCFZipPackage extends OCFPackage {

ZipFile zip;
private ZipFile zip;
private List<String> allEntries = null;
private Set<String> fileEntries;
private Set<String> dirEntries;

public OCFZipPackage(ZipFile zip) {
super();
this.zip = zip;
}

private void listEntries() throws IOException {
synchronized (zip) {
allEntries = new LinkedList<String>();
fileEntries = new HashSet<String>();
dirEntries = new HashSet<String>();
for (Enumeration<? extends ZipEntry> entries = zip.entries(); entries.hasMoreElements();) {
ZipEntry entry = entries.nextElement();
allEntries.add(entry.getName());
if (entry.isDirectory()) {
dirEntries.add(entry.getName());
} else {
fileEntries.add(entry.getName());
}
}
}
}

/* (non-Javadoc)
* @see com.adobe.epubcheck.ocf.OCFPackage#hasEntry(java.lang.String)
Expand Down Expand Up @@ -50,39 +74,36 @@ public InputStream getInputStream(String name) throws IOException {
return filter.decrypt(in);
return null;
}

@Override
public List<String> getEntries() throws IOException {
synchronized (zip) {
if (allEntries==null) listEntries();
}
return Collections.unmodifiableList(allEntries);
}

/* (non-Javadoc)
* @see com.adobe.epubcheck.ocf.OCFPackage#getFileEntries()
*/
@Override
public HashSet<String> getFileEntries() throws IOException {
HashSet<String> entryNames = new HashSet<String>();

for (Enumeration<? extends ZipEntry> entries = zip.entries(); entries.hasMoreElements();) {
ZipEntry entry = (ZipEntry) entries.nextElement();
if (!entry.isDirectory()) {
entryNames.add(entry.getName());
}
}

return entryNames;
public Set<String> getFileEntries() throws IOException {
synchronized (zip) {
if (allEntries==null) listEntries();
}
return Collections.unmodifiableSet(fileEntries);
}


/* (non-Javadoc)
* @see com.adobe.epubcheck.ocf.OCFPackage#getDirectoryEntries()
*/
@Override
public HashSet<String> getDirectoryEntries() throws IOException {
HashSet<String> entryNames = new HashSet<String>();

for (Enumeration<? extends ZipEntry> entries = zip.entries(); entries.hasMoreElements();) {
ZipEntry entry = (ZipEntry) entries.nextElement();
if (entry.isDirectory()) {
entryNames.add(entry.getName());
}
}
return entryNames;
public Set<String> getDirectoryEntries() throws IOException {
synchronized (zip) {
if (allEntries==null) listEntries();
}
return Collections.unmodifiableSet(dirEntries);
}

}
12 changes: 12 additions & 0 deletions src/test/java/com/adobe/epubcheck/api/Epub30CheckTest.java
Expand Up @@ -168,6 +168,18 @@ public void testFilenameContainsSpacesIssue239() {
testValidateDocument("invalid/issue239.epub", 0, 1);
}

@Test
public void testDuplicateZipEntriesIssue265() {
// duplicate entries should raise an error
testValidateDocument("invalid/issue265.epub", 1, 0);
}

@Test
public void testDuplicateZipEntriesIssue265b() {
// non-unique entry names (after NFC normalization) should raise a warning
testValidateDocument("invalid/issue265b.epub", 0, 4);
}

@Test
public void testIssue262() {
testValidateDocument("valid/issue262.epub", 0, 0);
Expand Down
12 changes: 12 additions & 0 deletions src/test/java/com/adobe/epubcheck/ocf/OCFMockPackage.java
Expand Up @@ -6,6 +6,8 @@
import java.io.InputStream;
import java.net.URL;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;

public class OCFMockPackage extends OCFPackage
{
Expand Down Expand Up @@ -77,4 +79,14 @@ public HashSet<String> getDirectoryEntries() throws IOException
return dirEntries;
}


@Override
public List<String> getEntries() throws IOException {
List<String> result = new LinkedList<String>();
result.addAll(mockEntries);
result.addAll(dirEntries);
return result;
}


}
Binary file added src/test/resources/30/epub/invalid/issue265.epub
Binary file not shown.
Binary file added src/test/resources/30/epub/invalid/issue265b.epub
Binary file not shown.

0 comments on commit 429a4a3

Please sign in to comment.