• Main Page
  • Namespaces
  • Data Structures
  • Files
  • File List

/www/proggenOrg/dedupe/export/trunk/kernel/kernel.cpp

00001 #include "kernel.h"
00002 
00003 Dedupe::Core::Kernel::Kernel( Dedupe::FilePath DatabaseLocation,
00004                                                     size_t Threads,
00005                                                       std::ostream &Messages )
00006 : Database( DatabaseLocation ),
00007   Threadcount( Threads ),
00008   Searcher( ),
00009   Hasher( ),
00010   MessageOut( Messages )
00011 {
00012 
00013 }
00014 
00015 void Dedupe::Core::Kernel::Threadcontroller( std::function<void(Dedupe::FileInfo&)> Func, Dedupe::FileStream &stream )
00016 {
00017   //if 0 is given, multithreading is disabled
00018   if( Threadcount == 0u ) std::for_each( stream.begin(), stream.end(), Func );
00019   else MessageOut << "NOTE:Multithreading not implemented yet" << std::endl;
00020 }
00021 
00022 void Dedupe::Core::Kernel::TrackFiles( Dedupe::FilePaths const &Paths, bool recursiv )
00023 {
00024   boost::function<void ( Dedupe::FileInfo& )>AddFunction;
00025 
00026   AddFunction = boost::bind( &Dedupe::Core::Kernel::AddFile, this, _1 );
00027 
00028   FileAndDirectoryHandlerToFunction( Paths, recursiv, AddFunction );
00029 }
00030 
00031 void Dedupe::Core::Kernel::AddFile( Dedupe::FileInfo &Info )
00032 {
00033   if( Database.AlreadyInDatabase( Info ) )
00034   {
00035     MessageOut << "NOTE:" << Info.GetPath() <<": File already in database\n";
00036     return;
00037    }
00038 
00039    Hasher.HashFileInfo( Info );
00040 
00041    Database.AddFile( Info );
00042 }
00043 
00044 void Dedupe::Core::Kernel::ListDatabase()
00045 {
00046   Dedupe::FileStream Files( Database.GetFiles());
00047   for( Dedupe::FileStream::const_iterator it = Files.begin();
00048                                           it != Files.end();
00049                                           ++it )
00050   {
00051     MessageOut << "LIST:"
00052                << it->GetPath() << ":"
00053                << it->GetSize() << ":"
00054                << it->GetDateChanged() << ":"
00055                << it->GetType() << ":"
00056                << it->GetHash() << "\n";
00057   }
00058 
00059 }
00060 
00061 Dedupe::FileStream Dedupe::Core::Kernel::GetDatabase()
00062 {
00063   return Database.GetFiles();
00064 }
00065 
00066 void Dedupe::Core::Kernel::UntrackFiles( Dedupe::FilePaths const &Paths, bool recursiv )
00067 {
00068   boost::function<void ( Dedupe::FileInfo& )>DelFunction;
00069 
00070   DelFunction = boost::bind( &Dedupe::Dataholding::Dataholding::DelFile, &Database, _1 );
00071 
00072   FileAndDirectoryHandlerToFunction( Paths, recursiv, DelFunction );
00073 }
00074 
00075 
00076 Dedupe::FileStream
00077 Dedupe::Core::Kernel::FileAndDirectoryHandler( Dedupe::FilePaths const &Paths,
00078                                             bool recursiv)
00079 {
00080   Dedupe::FileInfo *current;
00081   Dedupe::FileStream AllFilesFromUserinput;
00082 
00083 
00084   for( Dedupe::FilePaths::const_iterator it = Paths.begin(); it != Paths.end(); ++it)
00085   {
00086     current = new Dedupe::FileInfo( *it );
00087 
00088     //Catch all FileInfos where the constructor failed( File not existing )
00089     if( current->GetStatus() != Dedupe::FileInfo::FileOK )
00090     {
00091       MessageOut << "ERROR:" << current->GetErrorMessage() << std::endl;
00092       continue;
00093     }
00094     //Push files to the stream
00095     if( current->GetType() == Dedupe::FileInfo::TFile )
00096     {
00097       AllFilesFromUserinput.push_back( *current );
00098     }
00099     //if a directory is given, decide between recursive and not
00100     else if( current->GetType() == Dedupe::FileInfo::TDirectory )
00101     {
00102       if( !recursiv ) Searcher.Search( *current );
00103       else Searcher.SearchRecursive( *current );
00104       //push founded files to the final Container
00105       std::for_each(
00106          Searcher.GetFiles().begin(),
00107          Searcher.GetFiles().end(),
00108          [&AllFilesFromUserinput]
00109          (Dedupe::FileInfo File)
00110          {AllFilesFromUserinput.push_back( File );});
00111 
00112 
00113     }
00114     delete current;
00115   }
00116   return AllFilesFromUserinput;
00117 }
00118 
00119 
00120 void Dedupe::Core::Kernel::FileAndDirectoryHandlerToFunction( Dedupe::FilePaths const &Paths,
00121                       bool recursiv,
00122                       boost::function<void ( Dedupe::FileInfo& )>Function )
00123 {
00124   Dedupe::FileStream TmpStream = FileAndDirectoryHandler( Paths, recursiv );
00125   Threadcontroller( Function, TmpStream );
00126 }
00127 
00128 Dedupe::Core::Duplicates Dedupe::Core::Kernel::FindHashDuplicates()
00129 {
00130   //we search for duplicate in the database, decide by hash
00131   return FindDuplicates( Database.GetFiles(),
00132                         [](Dedupe::FileInfo lhs, Dedupe::FileInfo rhs )
00133                             { return lhs.GetHash() > rhs.GetHash(); },
00134                          [](Dedupe::FileInfo lhs, Dedupe::FileInfo rhs )
00135                             { return lhs.GetHash() == rhs.GetHash(); });
00136 }
00137 
00138 Dedupe::Core::Duplicates Dedupe::Core::Kernel::FindHashDuplicatesExtern(
00139                                                     Dedupe::FilePaths Paths,
00140                                                     bool recursiv)
00141 {
00142   Dedupe::FileStream AllFilesFromUser;
00143 
00144   AllFilesFromUser = FileAndDirectoryHandler( Paths, recursiv );
00145 
00146   std::function<void(Dedupe::FileInfo &Info)>HashFunc =
00147   std::bind( &Dedupe::Hash::Hash64::HashFileInfo, &Hasher, std::placeholders::_1 );
00148 
00149   Threadcontroller( HashFunc, AllFilesFromUser );
00150 
00151   //we search for duplicate in the given orders, decide by hash
00152   return FindDuplicates( AllFilesFromUser,
00153                         [](Dedupe::FileInfo lhs, Dedupe::FileInfo rhs )
00154                             { return lhs.GetHash() > rhs.GetHash(); },
00155                          [](Dedupe::FileInfo lhs, Dedupe::FileInfo rhs )
00156                             { return lhs.GetHash() == rhs.GetHash(); });
00157 }
00158 
00159 Dedupe::Core::Duplicates
00160 Dedupe::Core::Kernel::FindFilenameDuplicates()
00161 {
00162   //we search for duplicates in the database, decide duplicates by filename
00163   return FindDuplicates( Database.GetFiles(),
00164               [](Dedupe::FileInfo lhs, Dedupe::FileInfo rhs )
00165               { return lhs.GetPath().filename() > rhs.GetPath().filename();},
00166 
00167               [](Dedupe::FileInfo lhs, Dedupe::FileInfo rhs )
00168               { return lhs.GetPath().filename() == rhs.GetPath().filename();});
00169 }
00170 
00171 
00172 Dedupe::Core::Duplicates
00173 Dedupe::Core::Kernel::FindFilenameDuplicatesExtern( Dedupe::FilePaths Paths,
00174                                                     bool recursiv )
00175 {
00176   //we search for duplicates in the given orders, decide duplicates by filename
00177   return FindExternDuplicates( Paths, recursiv,
00178               [](Dedupe::FileInfo lhs, Dedupe::FileInfo rhs )
00179               { return lhs.GetPath().filename() > rhs.GetPath().filename();},
00180 
00181               [](Dedupe::FileInfo lhs, Dedupe::FileInfo rhs )
00182               { return lhs.GetPath().filename() == rhs.GetPath().filename();});
00183 }
00184 
00185 Dedupe::Core::Duplicates
00186 Dedupe::Core::Kernel::FindFilesizeDuplicates()
00187 {
00188   //we search for duplicates in the database, decide duplicates by filesize
00189   return FindDuplicates( Database.GetFiles(),
00190               [](Dedupe::FileInfo lhs, Dedupe::FileInfo rhs )
00191               { return lhs.GetSize() > rhs.GetSize();},
00192 
00193               [](Dedupe::FileInfo lhs, Dedupe::FileInfo rhs )
00194               { return lhs.GetSize() == rhs.GetSize();});
00195 }
00196 
00197 
00198 Dedupe::Core::Duplicates
00199 Dedupe::Core::Kernel::FindFilesizeDuplicatesExtern( Dedupe::FilePaths Paths,
00200                                                     bool recursiv )
00201 {
00202   //we search for duplicates in the given orders, decide duplicates by filesize
00203   return FindExternDuplicates( Paths, recursiv,
00204               [](Dedupe::FileInfo lhs, Dedupe::FileInfo rhs )
00205               { return lhs.GetSize() > rhs.GetSize();},
00206 
00207               [](Dedupe::FileInfo lhs, Dedupe::FileInfo rhs )
00208               { return lhs.GetSize() == rhs.GetSize();});
00209 }
00210 
00211 Dedupe::Core::Duplicates
00212 Dedupe::Core::Kernel::FindFiledateDuplicates()
00213 {
00214   //we search for duplicates in the database, decide duplicates by filedate
00215   return FindDuplicates( Database.GetFiles(),
00216               [](Dedupe::FileInfo lhs, Dedupe::FileInfo rhs )
00217               { return lhs.GetDateChanged() > rhs.GetDateChanged();},
00218 
00219               [](Dedupe::FileInfo lhs, Dedupe::FileInfo rhs )
00220               { return lhs.GetDateChanged() == rhs.GetDateChanged();});
00221 }
00222 
00223 
00224 Dedupe::Core::Duplicates
00225 Dedupe::Core::Kernel::FindFiledateDuplicatesExtern( Dedupe::FilePaths Paths,
00226                                                     bool recursiv )
00227 {
00228   //we search for duplicates in the given orders, decide duplicates by filedate
00229   return FindExternDuplicates( Paths, recursiv,
00230               [](Dedupe::FileInfo lhs, Dedupe::FileInfo rhs )
00231               { return lhs.GetDateChanged() > rhs.GetDateChanged();},
00232 
00233               [](Dedupe::FileInfo lhs, Dedupe::FileInfo rhs )
00234               { return lhs.GetDateChanged() == rhs.GetDateChanged();});
00235 }
00236 
00237 Dedupe::Core::Duplicates
00238 Dedupe::Core::Kernel::FindDuplicates( Dedupe::FileStream CurrentFiles,
00239          std::function<bool(Dedupe::FileInfo, Dedupe::FileInfo)> SortFunc,
00240          std::function<bool(Dedupe::FileInfo, Dedupe::FileInfo)> EqualFunc)
00241 {
00242   Duplicates Dups;
00243   HandleDuplicates Handle = 0;
00244 
00245   //if there is no file or one file, there can't be duplicates
00246   if( CurrentFiles.size() <= 1u ) return Dups;
00247 
00248   //Sort elements by hash value
00249   std::sort(CurrentFiles.begin(),
00250             CurrentFiles.end(),
00251             SortFunc);
00252 
00253 
00254   //We need two iterators to work
00255   Dedupe::FileStream::iterator WalkerBegin( CurrentFiles.begin() ),
00256                                WalkerEnd( WalkerBegin );
00257 
00258   Dedupe::Core::DuplicateGroup DupGroup;
00259 
00260   //this is the main search loop
00261   while( WalkerBegin != CurrentFiles.end() )
00262   {
00263     //Pos End to second element
00264     WalkerEnd = WalkerBegin + 1;
00265 
00266     /*increment End until an other hash is found
00267       also check if the end of the container is reached
00268       to prefent from a endless loop*/
00269     while( WalkerEnd != CurrentFiles.end()
00270            && EqualFunc( *WalkerBegin, *WalkerEnd ))
00271     {
00272       ++WalkerEnd;
00273     }
00274 
00275     //if only one element is found overjump, we search for duplicate groups
00276     if( (WalkerEnd - WalkerBegin) <= 1u )
00277     {
00278       ++WalkerBegin;
00279 
00280       continue;
00281     }
00282 
00283     while( WalkerBegin != WalkerEnd )
00284     {
00285       Handle = Dedupe::Core::HandleDuplicates::Empty;
00286       DupGroup.push_back( *WalkerBegin);
00287       ++WalkerBegin;
00288     }
00289     Dups.push_back( DupGroup );
00290     //Clear for next round in loop
00291     DupGroup.clear();
00292 
00293     WalkerBegin = WalkerEnd;
00294   }
00295 
00296   return Dups;
00297 }
00298 
00299 Dedupe::Core::Duplicates
00300 Dedupe::Core::Kernel::FindExternDuplicates(
00301   Dedupe::FilePaths Paths,
00302   bool recursiv,
00303   std::function<bool(Dedupe::FileInfo, Dedupe::FileInfo)>SortFunc,
00304   std::function<bool(Dedupe::FileInfo, Dedupe::FileInfo)> EqualFunc)
00305 {
00306 
00307   //Give founded files to FindDuplicates function
00308   return FindDuplicates( FileAndDirectoryHandler(Paths, recursiv),
00309                          SortFunc,
00310                          EqualFunc );
00311 }
00312 
00313 void Dedupe::Core::Kernel::ProcessDuplicates( FilesToProcess Dups )
00314 {
00315   /*boost::system::error_code ec;
00316 
00317   for( auto it = Dups.begin(); it != Dups.end(); ++it )
00318   {
00319     for( auto InIt= it->begin(); InIt != it->end(); ++InIt )
00320     {
00321       switch( InIt->second() )
00322       {
00323         case Dedupe::Core::HandleDuplicates::Empty : continue; break;
00324 
00325         case Dedupe::Core::HandleDuplicates::Keep  : continue; break;
00326 
00327         case Dedupe::Core::HandleDuplicates::MarkAsKeep :
00328 
00329         InIt->first.SetKeep( true );
00330         Database.UpdateFile( InIt->first);
00331         break;
00332 
00333         case Dedupe::Core::HandleDuplicates::Delete:
00334 
00335         MessageOut << "Deleting file: " << InIt->first.GetPath() << std::endl;
00336         Database.DelFile( InIt->first );
00337         boost::filesystem::remove( InIt->first.GetPath(),ec);
00338         if( ec != 0 ) MessageOut << ec.message();
00339         break;
00340 
00341         default : MessageOut << "Request can't be handled: Unknown Token\n";
00342       }
00343     }
00344   }*/
00345 }
00346 
00347 void Dedupe::Core::Kernel::AutoUpdateDatabase()
00348 {
00349   MessageOut << "Starting database update...\n";
00350   //Get a copy from the Files in the database
00351   Dedupe::FileStream FilesFromDatabase = Database.GetFiles();
00352   Dedupe::FileStream FilesToCheck;
00353 
00354   /*Use paths from the stored files to build
00355     new FileInfos and store them in FilesToCheck*/
00356   std::for_each( FilesFromDatabase.begin(),
00357                  FilesFromDatabase.end(),
00358         [ &FilesToCheck ]( Dedupe::FileInfo Current )
00359         { FilesToCheck.push_back( Dedupe::FileInfo( Current.GetPath())); });
00360 
00361   size_t i = 0;
00362   std::for_each( FilesToCheck.begin(),
00363                  FilesToCheck.end(),
00364         [ &FilesToCheck, FilesFromDatabase,this,&i ] (Dedupe::FileInfo Current )
00365         {
00366           /*if the filestatus is not FileOK delete file from database else
00367             update the file in database*/
00368           if( Current.GetStatus() != Dedupe::FileInfo::FileOK )
00369           {
00370             MessageOut << "Deleting file from database: "
00371                        << Current.GetPath() << std::endl;
00372             Database.DelFile( Current );
00373           }
00374           else
00375           {
00376             /*Update file only, if something of file has changed,
00377               but no check for hash*/
00378             if( !EqualNoHash( Current, FilesFromDatabase[ i ] ))
00379             {
00380               MessageOut << "Updating file in database: "
00381                          << Current.GetPath() << std::endl;
00382               Hasher.HashFileInfo( Current );
00383               Database.UpdateFile( Current );
00384             }
00385           }
00386           ++i;
00387         });
00388 
00389   MessageOut << "Databaseupdate finished.\n";
00390 }

Generated on Mon Mar 11 2013 12:04:52 for Dedupe by  doxygen 1.7.1