Permalink
Browse files

add missing fetch script for the face dataset

  • Loading branch information...
1 parent 7703b95 commit c75b74b60a2fa99170ca68e840b5af91c5d7887f @ogrisel ogrisel committed Mar 7, 2011
Showing with 29 additions and 0 deletions.
  1. +2 −0 .gitignore
  2. +27 −0 data/labeled_faces_wild/fetch_data.py
View
@@ -13,3 +13,5 @@ data/movie_reviews/poldata.README.2.0
data/languages/paragraphs
data/languages/short_paragraphs
data/languages/html
+
+data/labeled_faces_wild/lfw_preprocessed/
@@ -0,0 +1,27 @@
+"""Simple script to fetch a numpy version of the LFW data
+
+Original dataset and credits available at:
+
+ http://vis-www.cs.umass.edu/lfw/
+
+"""
+import os
+import urllib2
+
+URL = "https://downloads.sourceforge.net/project/scikit-learn/data/lfw_preprocessed.tar.gz"
+ARCHIVE_NAME = "lfw_preprocessed.tar.gz"
+FOLDER_NAME = "lfw_preprocessed"
+
+if not os.path.exists(FOLDER_NAME):
+ if not os.path.exists(ARCHIVE_NAME):
+ print "Downloading data, please Wait (58.8MB)..."
+ print URL
+ opener = urllib2.urlopen(URL)
+ open(ARCHIVE_NAME, 'wb').write(opener.read())
+ print
+
+ import tarfile
+ print "Decompressiong the archive: " + ARCHIVE_NAME
+ tarfile.open(ARCHIVE_NAME, "r:gz").extractall()
+ os.remove(ARCHIVE_NAME)
+

0 comments on commit c75b74b

Please sign in to comment.