• Main Page
  • Namespaces
  • Data Structures
  • Files
  • File List

/www/proggenOrg/dedupe/export/trunk/hash/hash.h

00001 //Hash.h
00002 //by Jochen Hebeler, 2010
00003 //Representing a class providing a 32/64-bit fnv-1 hash
00004 
00005 #ifndef ORG_PROGGEN_DEDUPE_HASH_HASH_H_
00006 #define ORG_PROGGEN_DEDUPE_HASH_HASH_H_
00007 
00008 #include <iostream>
00009 #include <fstream>
00010 #include <fileinfo.h>
00011 #include <searchfiles.h>
00012 #include <boost/filesystem/fstream.hpp>
00013 
00014 using namespace std;
00015 
00016 
00017 
00018 namespace Dedupe
00019 {
00020         namespace Hash
00021         {
00022         //Bad workaround: long long isn't supportet by all compilers, but no problem yet?
00023         #define FILE_PACKET_SIZE 104857600   //We will always load 100MB of a file(or less if required)
00024 
00025         typedef unsigned long long uint64__;
00026         typedef unsigned long uint32__;
00027 
00028         template <typename T> class Hash
00029         {
00030         private:
00031                 T hash; //our int for storing the hash, should be uint32 or uint64
00032                 T offset;  //The offset base for generating the hash, look up wikipedia for it(fnv-1)
00033                 T magic;  //The magic-number for generating hash, look also wikipedia up for it
00034         public:
00036         Hash()
00037         :hash ( 0 ),
00038          offset( (sizeof(T) > 4)?(14695981039346656037ULL):(2166136261UL) ),
00039          magic( (sizeof(T) > 4)?(1099511628211ULL):(16777619UL) )
00040         {
00041 
00042         }
00043 
00047         Hash(void *data, uint64__ length)
00048         :hash ( 0 ),
00049          offset( (sizeof(T) > 4)?(14695981039346656037ULL):(2166136261UL) ),
00050          magic( (sizeof(T) > 4)?(1099511628211ULL):(16777619UL) )
00051 {
00052         hash = offset;
00053         uint64__ i = 0;
00054         char *data2 = static_cast<char*>(data);
00055         T *longdata = static_cast<T*>(data);
00056         if((length % sizeof(T)) != 0) //If the length isn't dividable by the size of the hashvar, hash the single bytes until it is easy hashable
00057         {
00058                 for(; i < (length % sizeof(T)); i++)
00059                 {
00060                         hash = (hash ^ *(data2++)) * magic;    //hashen
00061                 }
00062         }
00063         longdata += (length % sizeof(T));
00064         for(; i < length; i+=sizeof(T))
00065         {
00066                 hash = (hash ^ *(longdata++)) * magic;  //Hash the rest
00067         }
00068 }
00069 
00074         void GenerateHash( void* data, uint64__ length )
00075 {
00076         hash = offset;
00077         uint64__ i = 0;
00078         char *data2 = static_cast<char*>(data);
00079         T *longdata = static_cast<T>(data);
00080         if((length % sizeof(T)) != 0)
00081         {
00082                 for(; i < (length % sizeof(T)); i++)
00083                 {
00084                         hash = (hash ^ *data2++) * magic;
00085                 }
00086         }
00087         longdata += (length % sizeof(T));
00088         for(; i < length; i += sizeof(T))
00089         {
00090                 hash = (hash ^ *longdata++) * magic;
00091         }
00092 }
00093 
00097         static T GetHash(void* data, uint64__ length, Hash &object)
00098 {
00099         T nHash = object.offset;
00100         uint64__ i = 0;
00101         char *data2 = static_cast<char*>(data);
00102         T *longdata = static_cast<T*>(data);
00103         if((length % sizeof(T)) != 0)
00104         {
00105                 for(; i < (length % sizeof(T)); i++)
00106                 {
00107                         nHash = (nHash ^ *data2++) * object.magic;
00108                 }
00109         }
00110         longdata += (length % sizeof(T));
00111         for(; i < length; i += sizeof(T))
00112         {
00113                 nHash = (nHash ^ *longdata++) * object.magic;
00114         }
00115         return nHash;
00116 }
00117 
00120         void HashFileInfo( Dedupe::FileInfo &info)
00121 {
00122   boost::filesystem::ifstream *file = new boost::filesystem::ifstream();
00123         char *data = new char[FILE_PACKET_SIZE];
00124         unsigned long long int length = 0;
00125         file->open(info.GetPath(), ifstream::in);
00126         file->seekg(0, ios::end);
00127         length = file->tellg();
00128         file->seekg(0, ios::beg);
00129 
00130         file->read(data, (FILE_PACKET_SIZE > length)?(length):(FILE_PACKET_SIZE));
00131         Hash<T> hash(data, (FILE_PACKET_SIZE > length)?(length):(FILE_PACKET_SIZE));
00132         length -= (104857600 > length)?(length):(FILE_PACKET_SIZE);
00133         while(file->good() && length > 0)
00134         {
00135           file->read(data, (FILE_PACKET_SIZE > length)?(length):(FILE_PACKET_SIZE));
00136                 hash.AddToHash(data, (FILE_PACKET_SIZE > length)?(length):(FILE_PACKET_SIZE));
00137                 length -= FILE_PACKET_SIZE;
00138         }
00139         file->close();
00140         info.SetHash( hash.GetHash() );
00141 
00142 
00143         delete file;
00144         delete data;
00145 }
00146 
00147 
00150         void HashMultiplyFileInfo(Dedupe::FileStream &stream)
00151 {
00152         std::for_each( stream.begin(), stream.end(), HashFileInfo );
00153 }
00154 
00158         void AddToHash( void* data, uint64__ length )
00159 {
00160         if(hash == 0)
00161         {
00162                 return;
00163         }
00164         uint64__ i = 0;
00165         char *data2 = static_cast<char*>(data);
00166         T *longdata = static_cast<T*>(data);
00167         if((length % sizeof(T)) != 0)
00168         {
00169                 for(; i < (length % sizeof(T)); i++)
00170                 {
00171                         hash = (hash ^ *data2++) * magic;
00172                 }
00173         }
00174         longdata += (length % sizeof(T));
00175         for(; i < length; i += sizeof(T))
00176         {
00177                 hash = (hash ^ *longdata++) * magic;
00178         }
00179 }
00180 
00183         T GetHash()
00184         {
00185                 return hash;
00186         }
00187 
00188         bool operator== (Hash<T> &h1 ) const
00189         {
00190                 if(this->hash == h1.GetHash())
00191                 {
00192                         return true;
00193                 }
00194                 return false;
00195         }
00196 
00197         bool operator!= (Hash<T> &h1 )
00198         {
00199                 if(this->hash != h1.GetHash())
00200                 {
00201                         return true;
00202                 }
00203                 return false;
00204         }
00205 
00209         void Display();
00210         };
00211 
00212         typedef Hash<uint64__> Hash64;
00213         typedef Hash<uint32__> Hash32;
00214 
00215         }
00216 }
00217 
00218 #endif

Generated on Mon Mar 11 2013 12:04:52 for Dedupe by  doxygen 1.7.1