From e2547f61ca865d2e7ea4583774b1cd996313d07b Mon Sep 17 00:00:00 2001 From: Justin Lewis Salmon Date: Tue, 28 May 2013 15:22:24 +0100 Subject: [PATCH 1/4] [xrdcopy] Implement recursive remote-to-local copy --- src/XrdCl/XrdClClassicCopyJob.cc | 63 +++++++++++++- src/XrdCl/XrdClCopy.cc | 137 ++++++++++++++++++++++++++++++- src/XrdCl/XrdClCopyProcess.hh | 4 +- 3 files changed, 200 insertions(+), 4 deletions(-) diff --git a/src/XrdCl/XrdClClassicCopyJob.cc b/src/XrdCl/XrdClClassicCopyJob.cc index 93172b839fa..b4a14b9fbf0 100644 --- a/src/XrdCl/XrdClClassicCopyJob.cc +++ b/src/XrdCl/XrdClClassicCopyJob.cc @@ -201,7 +201,7 @@ namespace //! Constructor //------------------------------------------------------------------------ Destination(): - pPosc( false ), pForce( false ), pCoerce( false ) {} + pPosc( false ), pForce( false ), pCoerce( false ), pMakeDir( false ) {} //------------------------------------------------------------------------ //! Destructor @@ -251,10 +251,19 @@ namespace pCoerce = coerce; } + //------------------------------------------------------------------------ + //! Set makedir + //------------------------------------------------------------------------ + void SetMakeDir( bool makedir ) + { + pMakeDir = makedir; + } + protected: bool pPosc; bool pForce; bool pCoerce; + bool pMakeDir; }; //---------------------------------------------------------------------------- @@ -681,6 +690,17 @@ namespace using namespace XrdCl; Log *log = DefaultEnv::GetLog(); + //---------------------------------------------------------------------- + // Make the directory path if necessary + //---------------------------------------------------------------------- + if( pMakeDir ) + { + std::string dirpath = pPath.substr(0, pPath.find_last_of("/")); + XRootDStatus st = MkPath( dirpath ); + if( !st.IsOK() ) + return st; + } + //---------------------------------------------------------------------- // Open the file for reading and get it's size //---------------------------------------------------------------------- @@ -740,6 +760,46 @@ namespace return XrdCl::Utils::GetLocalCheckSum( checkSum, checkSumType, pPath ); } + //------------------------------------------------------------------------ + //! Create a directory path + //------------------------------------------------------------------------ + virtual XrdCl::XRootDStatus MkPath( std::string &path ) + { + using namespace XrdCl; + Log *log = DefaultEnv::GetLog(); + struct stat st; + + for( std::string::iterator iter = path.begin(); iter != path.end(); ) + { + std::string::iterator newIter = std::find( iter, path.end(), '/' ); + std::string newPath = std::string( path.begin(), newIter ); + + if( stat( newPath.c_str(), &st ) != 0 ) + { + if( mkdir( newPath.c_str(), 0755 ) != 0 && errno != EEXIST ) + { + log->Debug( UtilityMsg, "Cannot create directory %s: %s", + newPath.c_str(), strerror( errno ) ); + return XRootDStatus( stError, errOSError, errno ); + } + } + else if( !S_ISDIR( st.st_mode ) ) + { + errno = ENOTDIR; + log->Debug( UtilityMsg, "Path %s not a directory: %s", + newPath.c_str(), strerror( errno ) ); + return XRootDStatus( stError, errOSError, errno ); + } + else + log->Dump( UtilityMsg, "Path %s already exists", newPath.c_str() ); + + iter = newIter; + if( newIter != path.end() ) + ++iter; + } + return 0; + } + private: std::string pPath; int pFD; @@ -953,6 +1013,7 @@ namespace XrdCl dest->SetForce( pJob->force ); dest->SetPOSC( pJob->posc ); dest->SetCoerce( pJob->coerce ); + dest->SetMakeDir( pJob->makedir ); st = dest->Initialize(); if( !st.IsOK() ) return st; diff --git a/src/XrdCl/XrdClCopy.cc b/src/XrdCl/XrdClCopy.cc index 00ee843d3bd..d0988d1ab5a 100644 --- a/src/XrdCl/XrdClCopy.cc +++ b/src/XrdCl/XrdClCopy.cc @@ -234,6 +234,105 @@ void AdjustFileInfo( XrdCpFile *file ) } }; +//------------------------------------------------------------------------------ +// Get a list of files and a list of directories inside a remote directory +//------------------------------------------------------------------------------ +XrdCl::XRootDStatus GetDirList( XrdCl::FileSystem *fs, + XrdCl::URL url, + std::vector *&files, + std::vector *&directories ) +{ + using namespace XrdCl; + DirectoryList *list; + XRootDStatus status; + Log *log = DefaultEnv::GetLog(); + + status = fs->DirList( url.GetPath(), DirListFlags::Stat, list ); + if( !status.IsOK() ) + { + log->Error( AppMsg, "Error listing directory: %s", + status.GetErrorMessage().c_str()); + return status; + } + + for ( DirectoryList::Iterator it = list->Begin(); it != list->End(); ++it ) + { + if ( (*it)->GetStatInfo()->TestFlags( StatInfo::IsDir ) ) + { + std::string directory = (*it)->GetName(); + directories->push_back( directory ); + } + else + { + std::string file = (*it)->GetName(); + files->push_back( file ); + } + } + + return XRootDStatus(); +} + +//------------------------------------------------------------------------------ +// Recursively index all files and directories inside a remote directory +//------------------------------------------------------------------------------ +XrdCpFile* IndexRemote( XrdCl::FileSystem *fs, + std::string basePath, + uint16_t dirOffset ) +{ + using namespace XrdCl; + + XrdCpFile *start = new XrdCpFile(); + XrdCpFile *end = start; + XrdCpFile *current; + URL source( basePath ); + int badUrl; + + std::vector *files = new std::vector(); + std::vector *directories = new std::vector(); + + Log *log = DefaultEnv::GetLog(); + log->Debug( AppMsg, "Indexing %s", basePath.c_str() ); + + XRootDStatus status = GetDirList( fs, source, files, directories ); + if( !status.IsOK() ) + { + log->Info( AppMsg, "Failed to get directory listing for %s: %s", + source.GetURL().c_str(), + status.GetErrorMessage().c_str() ); + } + + std::vector::iterator it; + for( it = files->begin(); it != files->end(); ++it ) + { + std::string file = basePath + "/" + (*it); + log->Dump( AppMsg, "Found file %s", file.c_str() ); + + current = new XrdCpFile( file.c_str(), badUrl ); + if( badUrl ) + { + log->Error( AppMsg, "Bad URL: %s", current->Path ); + return 0; + } + + current->Doff = dirOffset; + end->Next = current; + end = current; + } + + for( it = directories->begin(); it != directories->end(); ++it ) + { + std::string directory = basePath + "/" + (*it); + log->Dump( AppMsg, "Found directory %s", directory.c_str() ); + + end->Next = IndexRemote( fs, directory, dirOffset ); + while( end->Next ) end = end->Next; + } + + delete files; + delete directories; + return start->Next; +} + //------------------------------------------------------------------------------ // Clean up the copy job descriptors //------------------------------------------------------------------------------ @@ -255,7 +354,7 @@ int main( int argc, char **argv ) // Configure the copy command, if it returns then everything went well, ugly //---------------------------------------------------------------------------- XrdCpConfig config( argv[0] ); - config.Config( argc, argv, 0 ); + config.Config( argc, argv, XrdCpConfig::optRmtRec ); if( !AllOptionsSupported( &config ) ) return 254; ProcessCommandLineEnv( &config ); @@ -281,12 +380,14 @@ int main( int argc, char **argv ) bool thirdPartyFallBack = true; bool force = false; bool coerce = false; + bool makedir = false; if( config.Want( XrdCpConfig::DoPosc ) ) posc = true; if( config.Want( XrdCpConfig::DoForce ) ) force = true; if( config.Want( XrdCpConfig::DoCoerce ) ) coerce = true; if( config.Want( XrdCpConfig::DoTpc ) ) thirdParty = true; if( config.Want( XrdCpConfig::DoTpcOnly ) ) thirdPartyFallBack = false; + if( config.Want( XrdCpConfig::DoRecurse ) ) makedir = true; std::string checkSumType; std::string checkSumPreset; @@ -362,9 +463,39 @@ int main( int argc, char **argv ) } //---------------------------------------------------------------------------- - // Process the sources + // If we're doing remote recursive copy, chain all the files (if it's a + // directory) //---------------------------------------------------------------------------- + if( config.DoRecurse && config.srcFile->Protocol == XrdCpFile::isXroot ) + { + URL source( config.srcFile->Path ); + FileSystem *fs = new FileSystem( source ); + StatInfo *statInfo = 0; + + XRootDStatus st = fs->Stat( source.GetPath(), statInfo ); + if( st.IsOK() && statInfo->TestFlags( StatInfo::IsDir ) ) + { + //------------------------------------------------------------------------ + // Recursively index the remote directory + //------------------------------------------------------------------------ + delete config.srcFile; + config.srcFile = IndexRemote( fs, source.GetURL(), + source.GetURL().size() ); + if ( !config.srcFile ) + { + std::cerr << "Error indexing remote directory."; + return 255; + } + } + + delete fs; + delete statInfo; + } + XrdCpFile *sourceFile = config.srcFile; + //---------------------------------------------------------------------------- + // Process the sources + //---------------------------------------------------------------------------- while( sourceFile ) { AdjustFileInfo( sourceFile ); @@ -400,6 +531,7 @@ int main( int argc, char **argv ) job->force = force; job->posc = posc; job->coerce = coerce; + job->makedir = makedir; job->thirdParty = thirdParty; job->thirdPartyFallBack = thirdPartyFallBack; job->checkSumType = checkSumType; @@ -434,3 +566,4 @@ int main( int argc, char **argv ) CleanUpJobs( jobs ); return 0; } + diff --git a/src/XrdCl/XrdClCopyProcess.hh b/src/XrdCl/XrdClCopyProcess.hh index 408de2d1abf..fc3ba49425e 100644 --- a/src/XrdCl/XrdClCopyProcess.hh +++ b/src/XrdCl/XrdClCopyProcess.hh @@ -71,7 +71,7 @@ namespace XrdCl { JobDescriptor(): sourceLimit(1), force(false), posc(false), coerce(false), thirdParty(false), checkSumPrint(false), chunkSize( 4194304 ), - parallelChunks(8) + makedir(false), parallelChunks(8) {} URL source; //!< [in] original source URL @@ -83,6 +83,8 @@ namespace XrdCl //!< close bool coerce; //!< [in] ignore file usage rules, //!< ie. apply Force flag to Open + bool makedir; //!< [in] create directory path to + //!< file if it doesn't exist bool thirdParty; //!< [in] do third party copy if //!< possible bool thirdPartyFallBack; //!< [in] fall back to classic copy From 830409e0f5bcb0dc49022a730ddbeedcd3f69977 Mon Sep 17 00:00:00 2001 From: Justin Lewis Salmon Date: Tue, 28 May 2013 15:54:57 +0100 Subject: [PATCH 2/4] [XrdCl] Use ToStr() instead of GetErrorMessage() when dirlist fails --- src/XrdCl/XrdClCopy.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/XrdCl/XrdClCopy.cc b/src/XrdCl/XrdClCopy.cc index d0988d1ab5a..14ddd8aacbe 100644 --- a/src/XrdCl/XrdClCopy.cc +++ b/src/XrdCl/XrdClCopy.cc @@ -251,7 +251,7 @@ XrdCl::XRootDStatus GetDirList( XrdCl::FileSystem *fs, if( !status.IsOK() ) { log->Error( AppMsg, "Error listing directory: %s", - status.GetErrorMessage().c_str()); + status.ToStr().c_str()); return status; } From b4dd02bea9d0772ef1b7788b6bc143faa3dfc093 Mon Sep 17 00:00:00 2001 From: Justin Lewis Salmon Date: Tue, 28 May 2013 15:55:43 +0100 Subject: [PATCH 3/4] [XrdCl] Return XRootDStatus object instead of 0 in MakePath() --- src/XrdCl/XrdClClassicCopyJob.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/XrdCl/XrdClClassicCopyJob.cc b/src/XrdCl/XrdClClassicCopyJob.cc index b4a14b9fbf0..5ab4b0273c0 100644 --- a/src/XrdCl/XrdClClassicCopyJob.cc +++ b/src/XrdCl/XrdClClassicCopyJob.cc @@ -797,7 +797,7 @@ namespace if( newIter != path.end() ) ++iter; } - return 0; + return XRootDStatus(); } private: From 4aa75357be90494625235a53bc4b4415fa90342a Mon Sep 17 00:00:00 2001 From: Justin Lewis Salmon Date: Tue, 28 May 2013 16:00:40 +0100 Subject: [PATCH 4/4] [XrdCl] Pass URL by const reference rather than by value --- src/XrdCl/XrdClCopy.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/XrdCl/XrdClCopy.cc b/src/XrdCl/XrdClCopy.cc index 14ddd8aacbe..7ace3802cd1 100644 --- a/src/XrdCl/XrdClCopy.cc +++ b/src/XrdCl/XrdClCopy.cc @@ -238,7 +238,7 @@ void AdjustFileInfo( XrdCpFile *file ) // Get a list of files and a list of directories inside a remote directory //------------------------------------------------------------------------------ XrdCl::XRootDStatus GetDirList( XrdCl::FileSystem *fs, - XrdCl::URL url, + const XrdCl::URL &url, std::vector *&files, std::vector *&directories ) {