Skip to content

Commit

Permalink
Merge pull request #378 from alja/open-error-checks
Browse files Browse the repository at this point in the history
pfc-V2: Additional file open checks and error messages
  • Loading branch information
abh3 committed Jun 16, 2016
2 parents fd0e54d + f39041e commit 3ff7f8f
Show file tree
Hide file tree
Showing 3 changed files with 106 additions and 96 deletions.
137 changes: 70 additions & 67 deletions src/XrdFileCache/XrdFileCacheFile.cc
Expand Up @@ -104,33 +104,39 @@ void File::BlockRemovedFromWriteQ(Block* b)

File::~File()
{
m_syncStatusMutex.Lock();
bool needs_sync = ! m_writes_during_sync.empty();
m_syncStatusMutex.UnLock();
if (needs_sync || m_non_flushed_cnt > 0)
if (m_infoFile)
{
Sync();
m_cfi.WriteHeader(m_infoFile);
}
// write statistics in *cinfo file
AppendIOStatToFileInfo();
m_infoFile->Fsync();
m_syncStatusMutex.Lock();

delete m_syncer;
m_syncer = NULL;
bool needs_sync = ! m_writes_during_sync.empty();
m_syncStatusMutex.UnLock();
if (needs_sync || m_non_flushed_cnt > 0)
{
Sync();
m_cfi.WriteHeader(m_infoFile);
}

// write statistics in *cinfo file
AppendIOStatToFileInfo();
m_infoFile->Fsync();

m_syncStatusMutex.UnLock();


m_infoFile->Close();
delete m_infoFile;
m_infoFile = NULL;
}

if (m_output)
{
m_output->Close();
delete m_output;
m_output = NULL;
}
if (m_infoFile)
{
m_infoFile->Close();
delete m_infoFile;
m_infoFile = NULL;
}

delete m_syncer;
m_syncer = NULL;

// print just for curiosity
TRACEF(Debug, "File::~File() ended, prefetch score = " << m_prefetchScore);
Expand Down Expand Up @@ -208,71 +214,68 @@ bool File::Open()
XrdOss &m_output_fs = *Cache::GetInstance().GetOss();
// Create the data file itself.
XrdOucEnv myEnv;
m_output_fs.Create(Cache::GetInstance().RefConfiguration().m_username.c_str(), m_temp_filename.c_str(), 0600, myEnv, XRDOSS_mkpath);
m_output = m_output_fs.newFile(Cache::GetInstance().RefConfiguration().m_username.c_str());
if (m_output)
{
int res = m_output->Open(m_temp_filename.c_str(), O_RDWR, 0600, myEnv);
if (res < 0)
{
TRACEF(Error, "File::Open() can't open data file");
delete m_output;
m_output = 0;
return false;
}
if ( m_output_fs.Create(Cache::GetInstance().RefConfiguration().m_username.c_str(), m_temp_filename.c_str(), 0600, myEnv, XRDOSS_mkpath) !=XrdOssOK)
{
TRACEF(Error, "File::Open() can't create data file, " << strerror(errno));
return false;
}
else

m_output = m_output_fs.newFile(Cache::GetInstance().RefConfiguration().m_username.c_str());
if (m_output->Open(m_temp_filename.c_str(), O_RDWR, 0600, myEnv) != XrdOssOK)
{
TRACEF(Error, "File::Open() can't get file handle");
TRACEF(Error, "File::Open() can't get FD for data file, " << strerror(errno));
delete m_output;
m_output = 0;
return false;
}


// Create the info file
std::string ifn = m_temp_filename + Info::m_infoExtension;

struct stat infoStat;
bool fileExisted = (Cache::GetInstance().GetOss()->Stat(ifn.c_str(), &infoStat) == XrdOssOK);

m_output_fs.Create(Cache::GetInstance().RefConfiguration().m_username.c_str(), ifn.c_str(), 0600, myEnv, XRDOSS_mkpath);
m_infoFile = m_output_fs.newFile(Cache::GetInstance().RefConfiguration().m_username.c_str());
if (m_infoFile)
// AMT: the folowing below is a sanity check, it is not expected to happen. Could be an assert
if (fileExisted && (infoStat.st_size == 0)) {
TRACEF(Error, "File::Open() info file stored zero data file size");
return false;
}

if (m_output_fs.Create(Cache::GetInstance().RefConfiguration().m_username.c_str(), ifn.c_str(), 0600, myEnv, XRDOSS_mkpath) != XrdOssOK)
{
if (fileExisted) assert(infoStat.st_size > 0);
int res = m_infoFile->Open(ifn.c_str(), O_RDWR, 0600, myEnv);
if (res < 0)
{
TRACEF(Error, "File::Open() can't open info file");
delete m_infoFile;
m_infoFile = 0;
return false;
}
else {
if (fileExisted)
{
int res = m_cfi.Read(m_infoFile);
TRACEF(Debug, "Reading existing info file bytes = " << res);
m_downloadCond.Lock();
// this method is called from constructor, no need to lock downloadStaus
bool complete = m_cfi.IsComplete();
if (complete) m_prefetchState = kComplete;
m_downloadCond.UnLock();
}
else {
m_fileSize = m_fileSize;
int ss = (m_fileSize - 1)/m_cfi.GetBufferSize() + 1;
TRACEF(Debug, "Creating new file info, data size = " << m_fileSize << " num blocks = " << ss);
m_cfi.SetBufferSize(Cache::GetInstance().RefConfiguration().m_bufferSize);
m_cfi.SetFileSize(m_fileSize);
m_cfi.WriteHeader(m_infoFile);
m_infoFile->Fsync();
}
}
TRACEF(Error, "File::Open() can't create info file, " << strerror(errno));
return false;
}
else
m_infoFile = m_output_fs.newFile(Cache::GetInstance().RefConfiguration().m_username.c_str());
if (m_infoFile->Open(ifn.c_str(), O_RDWR, 0600, myEnv) != XrdOssOK)
{
// this should be a rare case wher FD can't be created
TRACEF(Error, "File::Open() can't get FD info file, " << strerror(errno));
delete m_infoFile;
m_infoFile = 0;
return false;
}

if (fileExisted)
{
int res = m_cfi.Read(m_infoFile);
TRACEF(Debug, "Reading existing info file bytes = " << res);
m_downloadCond.Lock();
// this method is called from constructor, no need to lock downloadStaus
bool complete = m_cfi.IsComplete();
if (complete) m_prefetchState = kComplete;
m_downloadCond.UnLock();
}
else {
m_fileSize = m_fileSize;
int ss = (m_fileSize - 1)/m_cfi.GetBufferSize() + 1;
TRACEF(Debug, "Creating new file info, data size = " << m_fileSize << " num blocks = " << ss);
m_cfi.SetBufferSize(Cache::GetInstance().RefConfiguration().m_bufferSize);
m_cfi.SetFileSize(m_fileSize);
m_cfi.WriteHeader(m_infoFile);
m_infoFile->Fsync();
}

if (m_prefetchState != kComplete) cache()->RegisterPrefetchFile(this);
return true;
}
Expand Down
3 changes: 3 additions & 0 deletions src/XrdFileCache/XrdFileCacheIOEntireFile.cc
Expand Up @@ -120,6 +120,9 @@ int IOEntireFile::initCachedStat(const char* path)
TRACEIO(Error, "IOEntireFile::initCachedStat failed to read file size from info file");
}
}
else {
TRACEIO(Error, "IOEntireFile::initCachedStat can't open info file " << strerror(errno));
}
infoFile->Close();
delete infoFile;
}
Expand Down
62 changes: 33 additions & 29 deletions src/XrdFileCache/XrdFileCachePurge.cc
Expand Up @@ -86,42 +86,46 @@ void FillFileMapRecurse( XrdOssDF* iOssDF, const std::string& path, FPurgeState&
{
// XXXX MT - shouldn't we also check if it is currently opened?

fh->Open(np.c_str(), O_RDONLY, 0600, env);
Info cinfo(Cache::GetInstance().GetTrace());
time_t accessTime;
cinfo.Read(fh);
if (cinfo.GetLatestDetachTime(accessTime, fh))
{
TRACE(Dump, "FillFileMapRecurse() checking " << buff << " accessTime " << accessTime);
purgeState.checkFile(accessTime, np.c_str(), cinfo.GetNDownloadedBytes());
}
else
{
// cinfo file does not contain any known accesses, use stat.mtime instead.

TRACE(Warning, "FillFileMapRecurse() could not get access time for " << np << ", trying stat");

XrdOss* oss = Cache::GetInstance().GetOss();
struct stat fstat;

if (oss->Stat(np.c_str(), &fstat) == XrdOssOK)
if (fh->Open(np.c_str(), O_RDONLY, 0600, env) == XrdOssOK) {
Info cinfo(Cache::GetInstance().GetTrace());
time_t accessTime;
cinfo.Read(fh);
if (cinfo.GetLatestDetachTime(accessTime, fh))
{
accessTime = fstat.st_mtime;
TRACE(Dump, "FillFileMapRecurse() have access time for " << np << " via stat: " << accessTime);
TRACE(Dump, "FillFileMapRecurse() checking " << buff << " accessTime " << accessTime);
purgeState.checkFile(accessTime, np.c_str(), cinfo.GetNDownloadedBytes());
}
else
{
// This really shouldn't happen ... but if it does remove cinfo and the data file right away.

TRACE(Warning, "FillFileMapRecurse() could not get access time for " << np);
oss->Unlink(np.c_str());
np = np.substr(0, np.size() - strlen(XrdFileCache::Info::m_infoExtension));
oss->Unlink(np.c_str());
// cinfo file does not contain any known accesses, use stat.mtime instead.

TRACE(Warning, "FillFileMapRecurse() could not get access time for " << np << ", trying stat");

XrdOss* oss = Cache::GetInstance().GetOss();
struct stat fstat;

if (oss->Stat(np.c_str(), &fstat) == XrdOssOK)
{
accessTime = fstat.st_mtime;
TRACE(Dump, "FillFileMapRecurse() have access time for " << np << " via stat: " << accessTime);
purgeState.checkFile(accessTime, np.c_str(), cinfo.GetNDownloadedBytes());
}
else
{
// This really shouldn't happen ... but if it does remove cinfo and the data file right away.

TRACE(Warning, "FillFileMapRecurse() could not get access time for " << np);
oss->Unlink(np.c_str());
np = np.substr(0, np.size() - strlen(XrdFileCache::Info::m_infoExtension));
oss->Unlink(np.c_str());
}
}
}
else {
TRACE(Warning, "FillFileMapRecurse() cant open " << np << " " << strerror(errno));
}
}
else if (dh->Opendir(np.c_str(), env) >= 0)
else if (dh->Opendir(np.c_str(), env) == XrdOssOK)
{
FillFileMapRecurse(dh, np, purgeState);
}
Expand Down Expand Up @@ -166,7 +170,7 @@ void Cache::CacheDirCleanup()
{
// make a sorted map of file patch by access time
XrdOssDF* dh = oss->newDir(m_configuration.m_username.c_str());
if (dh->Opendir(m_configuration.m_cache_dir.c_str(), env) >= 0)
if (dh->Opendir(m_configuration.m_cache_dir.c_str(), env) == XrdOssOK)
{
FPurgeState purgeState(bytesToRemove * 5 / 4); // prepare 20% more volume than required

Expand Down

0 comments on commit 3ff7f8f

Please sign in to comment.