1. 下載 vmware player
http://www.vmware.com/download/player/
2. 下載 hadoop vm img
http://code.google.com/edu/parallel/tools/hadoopvm/index.html
3. 安裝 vmware player 啟動 hadoop vm
4. 使用root 登入
id:root / password:root
5. 安裝 php
root# apt-get install php5-cli
6. 登出改用 guest 登入
id:guest / password:guest
7. 用 vim 寫兩支php 放到 /home/guest
#!/usr/bin/php
<?php
$word2count = array();
// input comes from STDIN (standard input)
while (($line = fgets(STDIN)) !== false) {
// remove leading and trailing whitespace and lowercase
$line = strtolower(trim($line));
// split the line into words while removing any empty string
$words = preg_split('/\W/', $line, 0, PREG_SPLIT_NO_EMPTY);
// increase counters
foreach ($words as $word) {
$word2count[$word] += 1;
}
}
// write the results to STDOUT (standard output)
// what we output here will be the input for the
// Reduce step, i.e. the input for reducer.py
foreach ($word2count as $word => $count) {
// tab-delimited
echo $word, chr(9), $count, PHP_EOL;
}
?>
---vim /home/guest/mapper.php ------------------------------------------------------------------------------End-
---vim /home/guest/reducer.php -------------------------------------------------------------------------------Start-
#!/usr/bin/php
<?php
$word2count = array();
// input comes from STDIN
while (($line = fgets(STDIN)) !== false) {
// remove leading and trailing whitespace
$line = trim($line);
// parse the input we got from mapper.php
list($word, $count) = explode(chr(9), $line);
// convert count (currently a string) to int
$count = intval($count);
// sum counts
if ($count > 0) $word2count[$word] += $count;
}
// sort the words lexigraphically
//
// this set is NOT required, we just do it so that our
// final output will look more like the official Hadoop
// word count examples
ksort($word2count);
// write the results to STDOUT (standard output)
foreach ($word2count as $word => $count) {
echo $word, chr(9), $count, PHP_EOL;
}
?>
---vim /home/guest/reducer.php -------------------------------------------------------------------------------End-
8. 修改權限後測測看
guest# chmod +x /home/guest/*.php
guest# echo "i love hadoop, hadoop love u" | /home/guest/mapper.php
guest# echo "i love hadoop, hadoop love u" | /home/guest/mapper.php | /home/guest/reducer.php
9. 直接利用裡面的資料來算 wordcount
/*把 txt file 整理到一個目錄裡*/
guest# mkdir /home/guest/test;cp /home/guest/hadoop/*.txt /home/guest/test
/*copy 到 hadoop file system*/
guest# hadoop dfs -copyFromLocal /home/guest/test test
/*查看有沒有拷貝成功*/
guest# hadoop dfs -ls test
/*執行 wordcount 運算*/
guest# hadoop jar /home/guest/hadoop/contrib/hadoop-streaming.jar -mapper /home/guest/mapper.php -reducer /home/guest/reducer.php -input test/* -output test-output
/*查看運算結果目錄*/
guest# hadoop dfs -ls test-output
/*dump 出運算結果*/
guest# haddop dfs -cat test-output/part-00000 | more
10. 用 web 也可以看。
http://yourhostname:50030/
11. 參考
http://www.lunchpauze.com/2007/10/writing-hadoop-mapreduce-program-in-php.html


Leave a comment