← Back to team overview

linuxdcpp-team team mailing list archive

[Branch ~dcplusplus-team/dcplusplus/trunk] Rev 3149: add a tool to hash files and gather some statistics

 

------------------------------------------------------------
revno: 3149
committer: poy <poy@xxxxxxxxxx>
branch nick: trunk
timestamp: Sat 2012-12-08 20:49:02 +0100
message:
  add a tool to hash files and gather some statistics
added:
  utils/hash.cpp


--
lp:dcplusplus
https://code.launchpad.net/~dcplusplus-team/dcplusplus/trunk

Your team Dcplusplus-team is subscribed to branch lp:dcplusplus.
To unsubscribe from this branch go to https://code.launchpad.net/~dcplusplus-team/dcplusplus/trunk/+edit-subscription
=== added file 'utils/hash.cpp'
--- utils/hash.cpp	1970-01-01 00:00:00 +0000
+++ utils/hash.cpp	2012-12-08 19:49:02 +0000
@@ -0,0 +1,119 @@
+// Tool to hash files using the interfaces provided by DC++ and gather some statistics.
+
+#include "base.h"
+
+#include <ctime>
+#include <iostream>
+
+#include <boost/accumulators/accumulators.hpp>
+#include <boost/accumulators/statistics.hpp>
+
+#include <dcpp/File.h>
+#include <dcpp/FileReader.h>
+#include <dcpp/MerkleTree.h>
+#include <dcpp/TigerHash.h>
+
+using namespace std;
+using namespace dcpp;
+using namespace boost::accumulators;
+
+void help() {
+	cout << "Arguments to run hash with:" << endl << "\t hash <path> [count]" << endl
+		<< "<path> is the file to hash." << endl
+		<< "[count] (optional) is the amount of times the file should be hashed to establish statistics." << endl;
+}
+
+enum { Path = 1, LastCompulsory = Path, Count };
+
+struct Info {
+	string root;
+	clock_t time;
+	double speed;
+};
+
+Info run(const string& path) {
+	// adapted from dcpp::HashManager
+
+	File f { path, File::READ, File::OPEN };
+	auto size = f.getSize();
+	f.close();
+
+	static const int64_t MIN_BLOCK_SIZE = 64 * 1024;
+	auto bs = std::max(TigerTree::calcBlockSize(size, 10), MIN_BLOCK_SIZE);
+
+	TigerTree tt { bs };
+
+	auto start = clock();
+
+	FileReader(true).read(path, [&](const void* buf, size_t n) -> bool {
+		tt.update(buf, n);
+		return true;
+	});
+
+	tt.finalize();
+
+	auto end = clock();
+
+	double speed = 0.0;
+	if(end > start) {
+		speed = static_cast<double>(size) * 1000.0 / static_cast<double>(end - start);
+	}
+
+	Info ret { tt.getRoot().toBase32(), end - start, speed };
+	return ret;
+}
+
+int main(int argc, char* argv[]) {
+	if(argc <= LastCompulsory) {
+		help();
+		return 1;
+	}
+
+	string path { argv[Path] };
+
+	try {
+		cout << "Hashing <" << path << ">..." << endl;
+		auto info = run(path);
+		cout << "Hashed <" << path << ">:" << endl
+			<< "\tTTH: " << info.root << endl
+			<< "\tTime: " << info.time << " ms" << endl
+			<< "\tSpeed: " << info.speed << " bytes/s" << endl;
+	} catch(const FileException& e) {
+		cout << "Error reading <" << path << ">: " << e.getError() << endl;
+		help();
+		return 2;
+	}
+
+	if(argc > Count) {
+		auto count = Util::toUInt(argv[Count]);
+		if(count < 1) {
+			cout << "Error: invalid count (" << count << ")." << endl;
+			help();
+			return 1;
+		}
+
+		cout << "Hashing again " << count << " more times..." << endl;
+
+		accumulator_set<double, stats<tag::variance>> time;
+		accumulator_set<double, stats<tag::variance>> speed;
+
+		for(decltype(count) i = 0; i < count; ++i) {
+			try {
+				auto info = run(path);
+				time(info.time);
+				speed(info.speed);
+			} catch(const FileException&) { }
+		}
+
+		if(boost::accumulators::count(time) != count || boost::accumulators::count(speed) != count) {
+			cout << "Error: couldn't gather statistics for " << count << " runs." << endl;
+			return 3;
+		}
+
+		cout << "Statistics on " << count << " runs:" << endl
+			<< "\tTime: mean = " << mean(time) << " s, std dev = " << sqrt(variance(time)) << " s" << endl
+			<< "\tSpeed: mean = " << mean(speed) << " bytes/s, std dev = " << sqrt(variance(speed)) << " bytes/s" << endl;
+	}
+
+	return 0;
+}