00001 #include "kernel.h"
00002
00003 Dedupe::Core::Kernel::Kernel( Dedupe::FilePath DatabaseLocation,
00004 size_t Threads,
00005 std::ostream &Messages )
00006 : Database( DatabaseLocation ),
00007 Threadcount( Threads ),
00008 Searcher( ),
00009 Hasher( ),
00010 MessageOut( Messages )
00011 {
00012
00013 }
00014
00015 void Dedupe::Core::Kernel::Threadcontroller( std::function<void(Dedupe::FileInfo&)> Func, Dedupe::FileStream &stream )
00016 {
00017
00018 if( Threadcount == 0u ) std::for_each( stream.begin(), stream.end(), Func );
00019 else MessageOut << "NOTE:Multithreading not implemented yet" << std::endl;
00020 }
00021
00022 void Dedupe::Core::Kernel::TrackFiles( Dedupe::FilePaths const &Paths, bool recursiv )
00023 {
00024 boost::function<void ( Dedupe::FileInfo& )>AddFunction;
00025
00026 AddFunction = boost::bind( &Dedupe::Core::Kernel::AddFile, this, _1 );
00027
00028 FileAndDirectoryHandlerToFunction( Paths, recursiv, AddFunction );
00029 }
00030
00031 void Dedupe::Core::Kernel::AddFile( Dedupe::FileInfo &Info )
00032 {
00033 if( Database.AlreadyInDatabase( Info ) )
00034 {
00035 MessageOut << "NOTE:" << Info.GetPath() <<": File already in database\n";
00036 return;
00037 }
00038
00039 Hasher.HashFileInfo( Info );
00040
00041 Database.AddFile( Info );
00042 }
00043
00044 void Dedupe::Core::Kernel::ListDatabase()
00045 {
00046 Dedupe::FileStream Files( Database.GetFiles());
00047 for( Dedupe::FileStream::const_iterator it = Files.begin();
00048 it != Files.end();
00049 ++it )
00050 {
00051 MessageOut << "LIST:"
00052 << it->GetPath() << ":"
00053 << it->GetSize() << ":"
00054 << it->GetDateChanged() << ":"
00055 << it->GetType() << ":"
00056 << it->GetHash() << "\n";
00057 }
00058
00059 }
00060
00061 Dedupe::FileStream Dedupe::Core::Kernel::GetDatabase()
00062 {
00063 return Database.GetFiles();
00064 }
00065
00066 void Dedupe::Core::Kernel::UntrackFiles( Dedupe::FilePaths const &Paths, bool recursiv )
00067 {
00068 boost::function<void ( Dedupe::FileInfo& )>DelFunction;
00069
00070 DelFunction = boost::bind( &Dedupe::Dataholding::Dataholding::DelFile, &Database, _1 );
00071
00072 FileAndDirectoryHandlerToFunction( Paths, recursiv, DelFunction );
00073 }
00074
00075
00076 Dedupe::FileStream
00077 Dedupe::Core::Kernel::FileAndDirectoryHandler( Dedupe::FilePaths const &Paths,
00078 bool recursiv)
00079 {
00080 Dedupe::FileInfo *current;
00081 Dedupe::FileStream AllFilesFromUserinput;
00082
00083
00084 for( Dedupe::FilePaths::const_iterator it = Paths.begin(); it != Paths.end(); ++it)
00085 {
00086 current = new Dedupe::FileInfo( *it );
00087
00088
00089 if( current->GetStatus() != Dedupe::FileInfo::FileOK )
00090 {
00091 MessageOut << "ERROR:" << current->GetErrorMessage() << std::endl;
00092 continue;
00093 }
00094
00095 if( current->GetType() == Dedupe::FileInfo::TFile )
00096 {
00097 AllFilesFromUserinput.push_back( *current );
00098 }
00099
00100 else if( current->GetType() == Dedupe::FileInfo::TDirectory )
00101 {
00102 if( !recursiv ) Searcher.Search( *current );
00103 else Searcher.SearchRecursive( *current );
00104
00105 std::for_each(
00106 Searcher.GetFiles().begin(),
00107 Searcher.GetFiles().end(),
00108 [&AllFilesFromUserinput]
00109 (Dedupe::FileInfo File)
00110 {AllFilesFromUserinput.push_back( File );});
00111
00112
00113 }
00114 delete current;
00115 }
00116 return AllFilesFromUserinput;
00117 }
00118
00119
00120 void Dedupe::Core::Kernel::FileAndDirectoryHandlerToFunction( Dedupe::FilePaths const &Paths,
00121 bool recursiv,
00122 boost::function<void ( Dedupe::FileInfo& )>Function )
00123 {
00124 Dedupe::FileStream TmpStream = FileAndDirectoryHandler( Paths, recursiv );
00125 Threadcontroller( Function, TmpStream );
00126 }
00127
00128 Dedupe::Core::Duplicates Dedupe::Core::Kernel::FindHashDuplicates()
00129 {
00130
00131 return FindDuplicates( Database.GetFiles(),
00132 [](Dedupe::FileInfo lhs, Dedupe::FileInfo rhs )
00133 { return lhs.GetHash() > rhs.GetHash(); },
00134 [](Dedupe::FileInfo lhs, Dedupe::FileInfo rhs )
00135 { return lhs.GetHash() == rhs.GetHash(); });
00136 }
00137
00138 Dedupe::Core::Duplicates Dedupe::Core::Kernel::FindHashDuplicatesExtern(
00139 Dedupe::FilePaths Paths,
00140 bool recursiv)
00141 {
00142 Dedupe::FileStream AllFilesFromUser;
00143
00144 AllFilesFromUser = FileAndDirectoryHandler( Paths, recursiv );
00145
00146 std::function<void(Dedupe::FileInfo &Info)>HashFunc =
00147 std::bind( &Dedupe::Hash::Hash64::HashFileInfo, &Hasher, std::placeholders::_1 );
00148
00149 Threadcontroller( HashFunc, AllFilesFromUser );
00150
00151
00152 return FindDuplicates( AllFilesFromUser,
00153 [](Dedupe::FileInfo lhs, Dedupe::FileInfo rhs )
00154 { return lhs.GetHash() > rhs.GetHash(); },
00155 [](Dedupe::FileInfo lhs, Dedupe::FileInfo rhs )
00156 { return lhs.GetHash() == rhs.GetHash(); });
00157 }
00158
00159 Dedupe::Core::Duplicates
00160 Dedupe::Core::Kernel::FindFilenameDuplicates()
00161 {
00162
00163 return FindDuplicates( Database.GetFiles(),
00164 [](Dedupe::FileInfo lhs, Dedupe::FileInfo rhs )
00165 { return lhs.GetPath().filename() > rhs.GetPath().filename();},
00166
00167 [](Dedupe::FileInfo lhs, Dedupe::FileInfo rhs )
00168 { return lhs.GetPath().filename() == rhs.GetPath().filename();});
00169 }
00170
00171
00172 Dedupe::Core::Duplicates
00173 Dedupe::Core::Kernel::FindFilenameDuplicatesExtern( Dedupe::FilePaths Paths,
00174 bool recursiv )
00175 {
00176
00177 return FindExternDuplicates( Paths, recursiv,
00178 [](Dedupe::FileInfo lhs, Dedupe::FileInfo rhs )
00179 { return lhs.GetPath().filename() > rhs.GetPath().filename();},
00180
00181 [](Dedupe::FileInfo lhs, Dedupe::FileInfo rhs )
00182 { return lhs.GetPath().filename() == rhs.GetPath().filename();});
00183 }
00184
00185 Dedupe::Core::Duplicates
00186 Dedupe::Core::Kernel::FindFilesizeDuplicates()
00187 {
00188
00189 return FindDuplicates( Database.GetFiles(),
00190 [](Dedupe::FileInfo lhs, Dedupe::FileInfo rhs )
00191 { return lhs.GetSize() > rhs.GetSize();},
00192
00193 [](Dedupe::FileInfo lhs, Dedupe::FileInfo rhs )
00194 { return lhs.GetSize() == rhs.GetSize();});
00195 }
00196
00197
00198 Dedupe::Core::Duplicates
00199 Dedupe::Core::Kernel::FindFilesizeDuplicatesExtern( Dedupe::FilePaths Paths,
00200 bool recursiv )
00201 {
00202
00203 return FindExternDuplicates( Paths, recursiv,
00204 [](Dedupe::FileInfo lhs, Dedupe::FileInfo rhs )
00205 { return lhs.GetSize() > rhs.GetSize();},
00206
00207 [](Dedupe::FileInfo lhs, Dedupe::FileInfo rhs )
00208 { return lhs.GetSize() == rhs.GetSize();});
00209 }
00210
00211 Dedupe::Core::Duplicates
00212 Dedupe::Core::Kernel::FindFiledateDuplicates()
00213 {
00214
00215 return FindDuplicates( Database.GetFiles(),
00216 [](Dedupe::FileInfo lhs, Dedupe::FileInfo rhs )
00217 { return lhs.GetDateChanged() > rhs.GetDateChanged();},
00218
00219 [](Dedupe::FileInfo lhs, Dedupe::FileInfo rhs )
00220 { return lhs.GetDateChanged() == rhs.GetDateChanged();});
00221 }
00222
00223
00224 Dedupe::Core::Duplicates
00225 Dedupe::Core::Kernel::FindFiledateDuplicatesExtern( Dedupe::FilePaths Paths,
00226 bool recursiv )
00227 {
00228
00229 return FindExternDuplicates( Paths, recursiv,
00230 [](Dedupe::FileInfo lhs, Dedupe::FileInfo rhs )
00231 { return lhs.GetDateChanged() > rhs.GetDateChanged();},
00232
00233 [](Dedupe::FileInfo lhs, Dedupe::FileInfo rhs )
00234 { return lhs.GetDateChanged() == rhs.GetDateChanged();});
00235 }
00236
00237 Dedupe::Core::Duplicates
00238 Dedupe::Core::Kernel::FindDuplicates( Dedupe::FileStream CurrentFiles,
00239 std::function<bool(Dedupe::FileInfo, Dedupe::FileInfo)> SortFunc,
00240 std::function<bool(Dedupe::FileInfo, Dedupe::FileInfo)> EqualFunc)
00241 {
00242 Duplicates Dups;
00243 HandleDuplicates Handle = 0;
00244
00245
00246 if( CurrentFiles.size() <= 1u ) return Dups;
00247
00248
00249 std::sort(CurrentFiles.begin(),
00250 CurrentFiles.end(),
00251 SortFunc);
00252
00253
00254
00255 Dedupe::FileStream::iterator WalkerBegin( CurrentFiles.begin() ),
00256 WalkerEnd( WalkerBegin );
00257
00258 Dedupe::Core::DuplicateGroup DupGroup;
00259
00260
00261 while( WalkerBegin != CurrentFiles.end() )
00262 {
00263
00264 WalkerEnd = WalkerBegin + 1;
00265
00266
00267
00268
00269 while( WalkerEnd != CurrentFiles.end()
00270 && EqualFunc( *WalkerBegin, *WalkerEnd ))
00271 {
00272 ++WalkerEnd;
00273 }
00274
00275
00276 if( (WalkerEnd - WalkerBegin) <= 1u )
00277 {
00278 ++WalkerBegin;
00279
00280 continue;
00281 }
00282
00283 while( WalkerBegin != WalkerEnd )
00284 {
00285 Handle = Dedupe::Core::HandleDuplicates::Empty;
00286 DupGroup.push_back( *WalkerBegin);
00287 ++WalkerBegin;
00288 }
00289 Dups.push_back( DupGroup );
00290
00291 DupGroup.clear();
00292
00293 WalkerBegin = WalkerEnd;
00294 }
00295
00296 return Dups;
00297 }
00298
00299 Dedupe::Core::Duplicates
00300 Dedupe::Core::Kernel::FindExternDuplicates(
00301 Dedupe::FilePaths Paths,
00302 bool recursiv,
00303 std::function<bool(Dedupe::FileInfo, Dedupe::FileInfo)>SortFunc,
00304 std::function<bool(Dedupe::FileInfo, Dedupe::FileInfo)> EqualFunc)
00305 {
00306
00307
00308 return FindDuplicates( FileAndDirectoryHandler(Paths, recursiv),
00309 SortFunc,
00310 EqualFunc );
00311 }
00312
00313 void Dedupe::Core::Kernel::ProcessDuplicates( FilesToProcess Dups )
00314 {
00315
00316
00317
00318
00319
00320
00321
00322
00323
00324
00325
00326
00327
00328
00329
00330
00331
00332
00333
00334
00335
00336
00337
00338
00339
00340
00341
00342
00343
00344
00345 }
00346
00347 void Dedupe::Core::Kernel::AutoUpdateDatabase()
00348 {
00349 MessageOut << "Starting database update...\n";
00350
00351 Dedupe::FileStream FilesFromDatabase = Database.GetFiles();
00352 Dedupe::FileStream FilesToCheck;
00353
00354
00355
00356 std::for_each( FilesFromDatabase.begin(),
00357 FilesFromDatabase.end(),
00358 [ &FilesToCheck ]( Dedupe::FileInfo Current )
00359 { FilesToCheck.push_back( Dedupe::FileInfo( Current.GetPath())); });
00360
00361 size_t i = 0;
00362 std::for_each( FilesToCheck.begin(),
00363 FilesToCheck.end(),
00364 [ &FilesToCheck, FilesFromDatabase,this,&i ] (Dedupe::FileInfo Current )
00365 {
00366
00367
00368 if( Current.GetStatus() != Dedupe::FileInfo::FileOK )
00369 {
00370 MessageOut << "Deleting file from database: "
00371 << Current.GetPath() << std::endl;
00372 Database.DelFile( Current );
00373 }
00374 else
00375 {
00376
00377
00378 if( !EqualNoHash( Current, FilesFromDatabase[ i ] ))
00379 {
00380 MessageOut << "Updating file in database: "
00381 << Current.GetPath() << std::endl;
00382 Hasher.HashFileInfo( Current );
00383 Database.UpdateFile( Current );
00384 }
00385 }
00386 ++i;
00387 });
00388
00389 MessageOut << "Databaseupdate finished.\n";
00390 }