diff --git a/CMakeLists.txt b/CMakeLists.txt index a39906a..e52212e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,11 +3,11 @@ # cmake_minimum_required (VERSION 3.8) -project ("entropy") +project (entropy VERSION 0.0.2) set(CMAKE_CXX_STANDARD 20) -set(CMAKE_CXX_STANDARD_REQUIRED True) -add_executable (entropy "entropy.cpp" ) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +add_executable (entropy entropy.cpp) if (WIN32) set_target_properties(entropy PROPERTIES LINK_FLAGS "/link setargv.obj") diff --git a/README.md b/README.md index 8285f68..250a697 100644 --- a/README.md +++ b/README.md @@ -15,8 +15,7 @@ this tool, which also supports Linux and macOS. ## Download -Windows releases are available [here](https://github.com/merces/entropy/releases). In order to run them, -you need the latest [Microsoft Visual C++ Redistributable](docs.microsoft.com/en-us/cpp/windows/latest-supported-vc-redist) +Windows releases are available [here](https://github.com/merces/entropy/releases). In order to run them, you need the latest [Microsoft Visual C++ Redistributable](docs.microsoft.com/en-us/cpp/windows/latest-supported-vc-redist) installed. For Linux and macOS see the [Building](#Building) section. @@ -25,11 +24,14 @@ For Linux and macOS see the [Building](#Building) section. Calculating the entropy of a single file: +```bash $ ./entropy /bin/ls 5.85 /bin/ls +``` Shell expansion is supported too: +```powershell PS C:\> .\entropy.exe C:\Users\User\Downloads\* 7.92 C:\Users\User\Downloads\1.jpeg 8.00 C:\Users\User\Downloads\setup.exe @@ -37,7 +39,8 @@ Shell expansion is supported too: 4.66 C:\Users\User\Downloads\data.bin 7.99 C:\Users\User\Downloads\pic.png 4.07 C:\Users\User\Downloads\budget.xls - +``` + From the above output one could say `/bin/ls` is not packed, `1.jpeg` uses compression, `setup.exe` is compressed, `nov.pptx` is compressed (yup, these modern MS Office files are all ZIP files indeed), `data.bin` is not compressed, etc. Is that garuanteed? No, it's just math. :nerd_face: @@ -48,6 +51,7 @@ ZIP files indeed), `data.bin` is not compressed, etc. Is that garuanteed? No, it Clone the repo: +```bash $ git clone https://github.com/merces/entropy.git $ cd entropy @@ -57,11 +61,14 @@ If you have CMake installed, build with: $ cd build $ cmake .. $ make - +``` + Or if you don't, just use `g++`: +```bash $ g++ -std=c++20 -o entropy entropy.cpp - +``` + ### Windows If you use a recent Visual Studio version, you can clone this repository and open the `CMakeLists.txt` here diff --git a/entropy.cpp b/entropy.cpp index 775b7a9..67c4aa9 100644 --- a/entropy.cpp +++ b/entropy.cpp @@ -6,9 +6,10 @@ double calculate_entropy(const unsigned int counted_bytes[256], const std::streamsize total_length) { double entropy = 0.; + double temp; for (int i = 0; i < 256; i++) { - double temp = (double)counted_bytes[i] / total_length; + temp = static_cast(counted_bytes[i]) / total_length; if (temp > 0.) { entropy += temp * fabs(log2(temp)); @@ -19,9 +20,9 @@ double calculate_entropy(const unsigned int counted_bytes[256], const std::strea } void usage() { - std::cout << "entropy calculates the entropy of files, but you need to provide it with a file. :)\n\n" << - "Usage:\n\tentropy FILE\n\n" << - "Examples:\n\tentropy image.png\n\tentropy music.mp3 document.xls\n\tentropy *.exe\n\n" << + std::cout << "entropy calculates the entropy of files, but you need to provide it with a file. :)\n\n" + "Usage:\n\tentropy FILE\n\n" + "Examples:\n\tentropy image.png\n\tentropy music.mp3 document.xls\n\tentropy *.exe\n\n" "For more information and bug reporting, refer to https://github.com/merces/entropy\n"; } @@ -35,40 +36,43 @@ int main(int argc, char *argv[]) // Entropy will have two decimal places std::cout << std::fixed << std::setprecision(2); + // 16KB chunks + std::vector buff(1024*16, 0); + std::streamsize total_bytes_read = 0; + std::streamsize bytes_read; + unsigned char count; + + // Count occurrence of each possible byte, from zero to 255. + unsigned int counted_bytes[256] = {}; + for (int i = 1; i < argc; i++) { // Skip directories, symlinks, etc if (!std::filesystem::is_regular_file(argv[i])) { + std::cerr << "\"" << argv[1] << "\"" << " isn't a regular file, skipping." << std::endl; continue; } // Open the file - std::ifstream f(argv[i], std::ios::binary); - if (f.fail()) { - std::cerr << "Could not open \"" << argv[i] << "\" for reading.\n"; + std::ifstream input_file(argv[i], std::ios::binary); + if (input_file.fail()) { + std::cerr << "Couldn't open \"" << argv[1] << "\" for reading." << std::endl; continue; } - // 16KB chunks - std::vector buff(1024*16, 0); - std::streamsize total_bytes_read = 0; - - // Count occurrence of each possible byte, from zero to 255. - unsigned int counted_bytes[256] = { 0 }; - // Read file in chunks and count the occurrences of each possible byte (0-255) - while (!f.eof()) { - f.read(buff.data(), buff.size()); - auto bytes_read = f.gcount(); + while (!input_file.eof()) { + input_file.read(buff.data(), buff.size()); + bytes_read = input_file.gcount(); total_bytes_read += bytes_read; for (int j = 0; j < bytes_read; j++) { - unsigned char c = buff[j]; - counted_bytes[c]++; + count = static_cast (buff[j]); + counted_bytes[count]++; } } - - f.close(); + input_file.close(); std::cout << calculate_entropy(counted_bytes, total_bytes_read) << " " << argv[i] << "\n"; } return 0; -} \ No newline at end of file +} +