00001
00002
00003
00004
00005 #ifndef ORG_PROGGEN_DEDUPE_HASH_HASH_H_
00006 #define ORG_PROGGEN_DEDUPE_HASH_HASH_H_
00007
00008 #include <iostream>
00009 #include <fstream>
00010 #include <fileinfo.h>
00011 #include <searchfiles.h>
00012 #include <boost/filesystem/fstream.hpp>
00013
00014 using namespace std;
00015
00016
00017
00018 namespace Dedupe
00019 {
00020 namespace Hash
00021 {
00022
00023 #define FILE_PACKET_SIZE 104857600 //We will always load 100MB of a file(or less if required)
00024
00025 typedef unsigned long long uint64__;
00026 typedef unsigned long uint32__;
00027
00028 template <typename T> class Hash
00029 {
00030 private:
00031 T hash;
00032 T offset;
00033 T magic;
00034 public:
00036 Hash()
00037 :hash ( 0 ),
00038 offset( (sizeof(T) > 4)?(14695981039346656037ULL):(2166136261UL) ),
00039 magic( (sizeof(T) > 4)?(1099511628211ULL):(16777619UL) )
00040 {
00041
00042 }
00043
00047 Hash(void *data, uint64__ length)
00048 :hash ( 0 ),
00049 offset( (sizeof(T) > 4)?(14695981039346656037ULL):(2166136261UL) ),
00050 magic( (sizeof(T) > 4)?(1099511628211ULL):(16777619UL) )
00051 {
00052 hash = offset;
00053 uint64__ i = 0;
00054 char *data2 = static_cast<char*>(data);
00055 T *longdata = static_cast<T*>(data);
00056 if((length % sizeof(T)) != 0)
00057 {
00058 for(; i < (length % sizeof(T)); i++)
00059 {
00060 hash = (hash ^ *(data2++)) * magic;
00061 }
00062 }
00063 longdata += (length % sizeof(T));
00064 for(; i < length; i+=sizeof(T))
00065 {
00066 hash = (hash ^ *(longdata++)) * magic;
00067 }
00068 }
00069
00074 void GenerateHash( void* data, uint64__ length )
00075 {
00076 hash = offset;
00077 uint64__ i = 0;
00078 char *data2 = static_cast<char*>(data);
00079 T *longdata = static_cast<T>(data);
00080 if((length % sizeof(T)) != 0)
00081 {
00082 for(; i < (length % sizeof(T)); i++)
00083 {
00084 hash = (hash ^ *data2++) * magic;
00085 }
00086 }
00087 longdata += (length % sizeof(T));
00088 for(; i < length; i += sizeof(T))
00089 {
00090 hash = (hash ^ *longdata++) * magic;
00091 }
00092 }
00093
00097 static T GetHash(void* data, uint64__ length, Hash &object)
00098 {
00099 T nHash = object.offset;
00100 uint64__ i = 0;
00101 char *data2 = static_cast<char*>(data);
00102 T *longdata = static_cast<T*>(data);
00103 if((length % sizeof(T)) != 0)
00104 {
00105 for(; i < (length % sizeof(T)); i++)
00106 {
00107 nHash = (nHash ^ *data2++) * object.magic;
00108 }
00109 }
00110 longdata += (length % sizeof(T));
00111 for(; i < length; i += sizeof(T))
00112 {
00113 nHash = (nHash ^ *longdata++) * object.magic;
00114 }
00115 return nHash;
00116 }
00117
00120 void HashFileInfo( Dedupe::FileInfo &info)
00121 {
00122 boost::filesystem::ifstream *file = new boost::filesystem::ifstream();
00123 char *data = new char[FILE_PACKET_SIZE];
00124 unsigned long long int length = 0;
00125 file->open(info.GetPath(), ifstream::in);
00126 file->seekg(0, ios::end);
00127 length = file->tellg();
00128 file->seekg(0, ios::beg);
00129
00130 file->read(data, (FILE_PACKET_SIZE > length)?(length):(FILE_PACKET_SIZE));
00131 Hash<T> hash(data, (FILE_PACKET_SIZE > length)?(length):(FILE_PACKET_SIZE));
00132 length -= (104857600 > length)?(length):(FILE_PACKET_SIZE);
00133 while(file->good() && length > 0)
00134 {
00135 file->read(data, (FILE_PACKET_SIZE > length)?(length):(FILE_PACKET_SIZE));
00136 hash.AddToHash(data, (FILE_PACKET_SIZE > length)?(length):(FILE_PACKET_SIZE));
00137 length -= FILE_PACKET_SIZE;
00138 }
00139 file->close();
00140 info.SetHash( hash.GetHash() );
00141
00142
00143 delete file;
00144 delete data;
00145 }
00146
00147
00150 void HashMultiplyFileInfo(Dedupe::FileStream &stream)
00151 {
00152 std::for_each( stream.begin(), stream.end(), HashFileInfo );
00153 }
00154
00158 void AddToHash( void* data, uint64__ length )
00159 {
00160 if(hash == 0)
00161 {
00162 return;
00163 }
00164 uint64__ i = 0;
00165 char *data2 = static_cast<char*>(data);
00166 T *longdata = static_cast<T*>(data);
00167 if((length % sizeof(T)) != 0)
00168 {
00169 for(; i < (length % sizeof(T)); i++)
00170 {
00171 hash = (hash ^ *data2++) * magic;
00172 }
00173 }
00174 longdata += (length % sizeof(T));
00175 for(; i < length; i += sizeof(T))
00176 {
00177 hash = (hash ^ *longdata++) * magic;
00178 }
00179 }
00180
00183 T GetHash()
00184 {
00185 return hash;
00186 }
00187
00188 bool operator== (Hash<T> &h1 ) const
00189 {
00190 if(this->hash == h1.GetHash())
00191 {
00192 return true;
00193 }
00194 return false;
00195 }
00196
00197 bool operator!= (Hash<T> &h1 )
00198 {
00199 if(this->hash != h1.GetHash())
00200 {
00201 return true;
00202 }
00203 return false;
00204 }
00205
00209 void Display();
00210 };
00211
00212 typedef Hash<uint64__> Hash64;
00213 typedef Hash<uint32__> Hash32;
00214
00215 }
00216 }
00217
00218 #endif