On 06/06/2011 06:53 PM, Christian Siefkes wrote:
I've just seen that somebody already did the same, but I too tried to optimize your wordcount implementation a bit and got to the following Vala program:
Meanwhile I fixed the program so that it will actually show the correct
output, i.e. the same line, word, and byte count like wc shows. (The byte
count will still be off if the file uses DOS-style instead of Unix-style
line endings, but it's a start.) The need to call char.isspace() (checking
for spaces and tabs) instead of just checking whether the char is a space
slows things down a bit:
$ time /usr/bin/wc ../shakespeare.txt
124456 901325 5582655 ../shakespeare.txt
real 0m0.253s
user 0m0.248s
sys 0m0.000s
$ time wc2 ../shakespeare.txt
124456 901325 5582655 ../shakespeare.txt
real 0m0.337s
user 0m0.320s
sys 0m0.008s
That's about 30% slower than the C version--still quite acceptable, I feel.
The modified program is appended below.
Best regards
Christian
namespace WordCount {
private int count_words(string str) {
// strip string and check if it is empty
string stripped = str.strip();
if (stripped == "") {
return 0;
}
// there is at least one word in a non-empty string
int result = 1;
bool in_word = true;
for (int i = 1; i < stripped.length; i++) {
if (stripped[i].isspace()) {
if (in_word) {
// begin of non-word
result++;
in_word = false;
}
} else {
if (!in_word) {
// begin of word
in_word = true;
}
}
}
return result;
}
public int main(string[] args) {
File file;
DataInputStream dis;
string line;
int wc, lc, bc;
for (int i = 1; i < args.length; i++) {
file = File.new_for_path(args[i]);
try {
dis = new DataInputStream(file.read());
wc = lc = bc = 0;
while ((line = dis.read_line(null)) != null) {
lc++;
bc += line.length + 1;
wc += count_words(line);
}
// print line + word + byte count + filename (like wc)
stdout.printf("%7d %7d %7d %s\n", lc, wc, bc, args[i]);
} catch (Error e) {
stderr.printf("Error processing file '%s': %s\n",
file.get_path(), e.message );
}
}
return 0;
}
}
--
|------- Dr. Christian Siefkes ------- christian siefkes net -------
| Homepage: http://www.siefkes.net/ | Blog: http://www.keimform.de/
| Peer Production Everywhere: http://peerconomy.org/wiki/
|---------------------------------- OpenPGP Key ID: 0x346452D8 --
Politics is for people who have a passion for changing life but lack a
passion for living it.
-- Tom Robbins, Even Cowgirls Get the Blues
Attachment:
signature.asc
Description: OpenPGP digital signature