2016-11-12
Perlでk-meansクラスタリング
完全にランダムなエリアから重心を選ぶと計算がうまくいかなかったので、ランダムにサンプルを抽出した。
TODO:任意のkを与えられるようにする
kmeans.pl
use strict; use warnings; use Moo; use feature qw(say); use Switch; use Data::Dumper; use List::Util qw(first max maxstr min minstr reduce shuffle sum); use POSIX; # k-means clustering # k=3 # iris data from R # initialize constant my $loop_count = 0; my $smax=0; my $smin=1000; #should be large enough my $lmax=0; my $lmin=1000; #should be large enough my $global_max; my $global_min; my @points; sub read_file(){ open(IN, "< ./dat/irisdata.txt"); while(<IN>){ if($loop_count==0){ $loop_count++; next; } my @array = split('\s',$_); push(@array,0); # this is label (=k) $smax=max($smax,$array[0]); $smin=min($smin,$array[0]); $lmax=max($lmax,$array[1]); $lmin=min($lmin,$array[1]); push(@points, \@array); } close(IN); $global_max = max($smax,$lmax); $global_min = min($smin,$lmin); } my @centers; sub select_random_center{ for(my $i=0;$i<3;$i++){ my $xc = rand($global_max-$global_min)+$global_min; my $yc = rand($global_max-$global_min)+$global_min; my $pair = [$xc,$yc]; @centers[$i] = $pair; } } sub select_center_from_sample{ my $length = $#points+1; for(my $i=0;$i<3;$i++){ my $xc = $points[floor(rand($length))][0]; my $yc = $points[floor(rand($length))][1]; my $pair = [$xc,$yc]; @centers[$i] = $pair; } } sub assign_label{ foreach my $point(@points){ my $xp = $point->[0]; my $yp = $point->[1]; my $label = 0; my $min_norm = $global_max**2; # should be large enough foreach my $center(@centers){ my $xc = $center->[0]; my $yc = $center->[1]; my $norm = sqrt(($xp-$xc)**2 + ($yp-$yc)**2); if($norm<$min_norm){ $point->[3] = $label; $min_norm = $norm; } $label++; } } } sub recalculate_center(){ my @x0s; my @y0s; my @x1s; my @y1s; my @x2s; my @y2s; foreach my $point(@points){ switch($point->[3]){ case 0 { push(@x0s,$point->[0]); push(@y0s,$point->[1]); } case 1 { push(@x1s,$point->[0]); push(@y1s,$point->[1]); } case 2 { push(@x2s,$point->[0]); push(@y2s,$point->[1]); } } } my $new_x0; my $new_y0; my $count; $count = $#x0s + 1; if($count>0){ for (my $i=0;$i<$count;$i++){ $new_x0 += $x0s[$i]; $new_y0 += $y0s[$i]; } $new_x0 = $new_x0 / $count; $new_y0 = $new_y0 / $count; my $new_pair0 = [$new_x0,$new_y0]; @centers[0] = $new_pair0; } my $new_x1; my $new_y1; $count = $#x1s + 1; if($count>0){ for (my $i=0;$i<$count;$i++){ $new_x1 += $x1s[$i]; $new_y1 += $y1s[$i]; } $new_x1 = $new_x1 / $count; $new_y1 = $new_y1 / $count; my $new_pair1 = [$new_x1,$new_y1]; @centers[1] = $new_pair1; } my $new_x2; my $new_y2; $count = $#x2s + 1; if($count>0){ for (my $i=0;$i<$count;$i++){ $new_x2 += $x2s[$i]; $new_y2 += $y2s[$i]; } $new_x2 = $new_x2 / $count; $new_y2 = $new_y2 / $count; my $new_pair2 = [$new_x2,$new_y2]; @centers[2] = $new_pair2; } } sub write_to_file(){ open(OUT,'> result.csv'); foreach my $point(@points){ print OUT $point->[0] . ','; print OUT $point->[1] . ','; print OUT $point->[2] . ','; print OUT $point->[3] . "\n"; } close(OUT); open(OUT,'> center.csv'); foreach my $center(@centers){ print OUT $center->[0] . ','; print OUT $center->[1] . ','; print OUT "center" . "\n"; } close(OUT); } sub k_means(){ read_file(); # select_random_center(); select_center_from_sample(); # loop for 1000 times for(my $i=0;$i<1000;$i++){ assign_label(); recalculate_center(); say "loop $i has finished"; } say 'computation done.'; say Dumper @centers; write_to_file(); } k_means();
dat/irisdata.txt
"iris.Sepal.Length" "iris.Petal.Length" "iris.Species" 5.1 1.4 "setosa" 4.9 1.4 "setosa" 4.7 1.3 "setosa" 4.6 1.5 "setosa" 5 1.4 "setosa" 5.4 1.7 "setosa" 4.6 1.4 "setosa" 5 1.5 "setosa" 4.4 1.4 "setosa" 4.9 1.5 "setosa" 5.4 1.5 "setosa" 4.8 1.6 "setosa" 4.8 1.4 "setosa" 4.3 1.1 "setosa" 5.8 1.2 "setosa" 5.7 1.5 "setosa" 5.4 1.3 "setosa" 5.1 1.4 "setosa" 5.7 1.7 "setosa" 5.1 1.5 "setosa" 5.4 1.7 "setosa" 5.1 1.5 "setosa" 4.6 1 "setosa" 5.1 1.7 "setosa" 4.8 1.9 "setosa" 5 1.6 "setosa" 5 1.6 "setosa" 5.2 1.5 "setosa" 5.2 1.4 "setosa" 4.7 1.6 "setosa" 4.8 1.6 "setosa" 5.4 1.5 "setosa" 5.2 1.5 "setosa" 5.5 1.4 "setosa" 4.9 1.5 "setosa" 5 1.2 "setosa" 5.5 1.3 "setosa" 4.9 1.4 "setosa" 4.4 1.3 "setosa" 5.1 1.5 "setosa" 5 1.3 "setosa" 4.5 1.3 "setosa" 4.4 1.3 "setosa" 5 1.6 "setosa" 5.1 1.9 "setosa" 4.8 1.4 "setosa" 5.1 1.6 "setosa" 4.6 1.4 "setosa" 5.3 1.5 "setosa" 5 1.4 "setosa" 7 4.7 "versicolor" 6.4 4.5 "versicolor" 6.9 4.9 "versicolor" 5.5 4 "versicolor" 6.5 4.6 "versicolor" 5.7 4.5 "versicolor" 6.3 4.7 "versicolor" 4.9 3.3 "versicolor" 6.6 4.6 "versicolor" 5.2 3.9 "versicolor" 5 3.5 "versicolor" 5.9 4.2 "versicolor" 6 4 "versicolor" 6.1 4.7 "versicolor" 5.6 3.6 "versicolor" 6.7 4.4 "versicolor" 5.6 4.5 "versicolor" 5.8 4.1 "versicolor" 6.2 4.5 "versicolor" 5.6 3.9 "versicolor" 5.9 4.8 "versicolor" 6.1 4 "versicolor" 6.3 4.9 "versicolor" 6.1 4.7 "versicolor" 6.4 4.3 "versicolor" 6.6 4.4 "versicolor" 6.8 4.8 "versicolor" 6.7 5 "versicolor" 6 4.5 "versicolor" 5.7 3.5 "versicolor" 5.5 3.8 "versicolor" 5.5 3.7 "versicolor" 5.8 3.9 "versicolor" 6 5.1 "versicolor" 5.4 4.5 "versicolor" 6 4.5 "versicolor" 6.7 4.7 "versicolor" 6.3 4.4 "versicolor" 5.6 4.1 "versicolor" 5.5 4 "versicolor" 5.5 4.4 "versicolor" 6.1 4.6 "versicolor" 5.8 4 "versicolor" 5 3.3 "versicolor" 5.6 4.2 "versicolor" 5.7 4.2 "versicolor" 5.7 4.2 "versicolor" 6.2 4.3 "versicolor" 5.1 3 "versicolor" 5.7 4.1 "versicolor" 6.3 6 "virginica" 5.8 5.1 "virginica" 7.1 5.9 "virginica" 6.3 5.6 "virginica" 6.5 5.8 "virginica" 7.6 6.6 "virginica" 4.9 4.5 "virginica" 7.3 6.3 "virginica" 6.7 5.8 "virginica" 7.2 6.1 "virginica" 6.5 5.1 "virginica" 6.4 5.3 "virginica" 6.8 5.5 "virginica" 5.7 5 "virginica" 5.8 5.1 "virginica" 6.4 5.3 "virginica" 6.5 5.5 "virginica" 7.7 6.7 "virginica" 7.7 6.9 "virginica" 6 5 "virginica" 6.9 5.7 "virginica" 5.6 4.9 "virginica" 7.7 6.7 "virginica" 6.3 4.9 "virginica" 6.7 5.7 "virginica" 7.2 6 "virginica" 6.2 4.8 "virginica" 6.1 4.9 "virginica" 6.4 5.6 "virginica" 7.2 5.8 "virginica" 7.4 6.1 "virginica" 7.9 6.4 "virginica" 6.4 5.6 "virginica" 6.3 5.1 "virginica" 6.1 5.6 "virginica" 7.7 6.1 "virginica" 6.3 5.6 "virginica" 6.4 5.5 "virginica" 6 4.8 "virginica" 6.9 5.4 "virginica" 6.7 5.6 "virginica" 6.9 5.1 "virginica" 5.8 5.1 "virginica" 6.8 5.9 "virginica" 6.7 5.7 "virginica" 6.7 5.2 "virginica" 6.3 5 "virginica" 6.5 5.2 "virginica" 6.2 5.4 "virginica" 5.9 5.1 "virginica"
Perlで多次元配列
http://www.kent-web.com/perl/chap3.html#chap3_7
#多次元配列 @array = ([1,2],[3,4]); #アクセス print $array[0][1];
Perlの配列/ハッシュの初期化と配列/ハッシュのリファレンス宣言
そういうことだったのか
# 配列の初期化 @array = (1,2,3); # ハッシュの初期化 %hash = (a=>1,b=>2,c=>3); # 配列のリファレンス $array_reference = [1,2,3]; # デリファレンス print $array_reference->[0] # ハッシュのリファレンス $hash_reference = {a=>1,b=2,c=3}; #デリファレンス print $hash_reference->{'a'};
2012-01-13
GD::GraphのCPANからのインストールに向けて
できない。
# sudo perl -MCPAN -e shell # install GD::Graph
すると、FTPが失敗する。
Can't use an undefined value as a symbol reference at /System/Library/Perl/5.12/Net/FTP/dataconn.pm line 54.
フォーラム読んでソースを修正してみたけど動かず。
仕方ないからCPANMに切り替えるも
[@ GD-2.46]$sudo cpanm libgd ! Finding libgd on cpanmetadb failed. ! Finding libgd on search.cpan.org failed. ! Finding libgd () on mirror http://search.cpan.org/CPAN failed. ! Couldn't find module or a distribution libgd () [@aoki-mba GD-2.46]$cpan GD::Graph Going to read '/Users/shohei/.cpan/Metadata' Database was generated on Sun, 22 May 2011 13:37:59 GMT Warning: You are not allowed to write into directory "/Users/shohei/.cpan/sources/authors". I'll continue, but if you encounter problems, they may be due to insufficient permissions. Fetching with LWP: ftp://ftp.u-aizu.ac.jp/pub/CPAN/authors/01mailrc.txt.gz LWP failed with code[500] message[Can't use an undefined value as a symbol reference] Fetching with Net::FTP: ftp://ftp.u-aizu.ac.jp/pub/CPAN/authors/01mailrc.txt.gz Can't use an undefined value as a symbol reference at /System/Library/Perl/5.12/Net/FTP/dataconn.pm line 54.
よく覚えてないけど
- libgd-configが見つからない
- libgd 2.0.28 or higherをダウンロードしろ
というエラーも出ている。
ソースからgdのインストールを試みる。
http://kempwire.com/tips/installing-gd-graphics-library-on-mac-os-x.html
- zlib
- libpng
- jpegsrc.v6b
- freetype-2
などをインストールしておく。
肝心のgdは開発中止なのか知らないけどどこにもない。
bitbucketに上がってるのをcloneする(要Mercurialのインストール)
※ちなみにここ(http://www.dmxzone.com/go?6824)を参考にして、
http://www.boutell.com/gd/http/gd-2.0.28.tar.gz
って手打ちしたらソースが落ちてきた。とっていいのかは知らん。
んでGDのインストール。
できたっぽい。
そこで再度cpanmを試みるも、
[@ GD-2.46]$sudo cpanm GD::Graph --> Working on GD::Graph Fetching http://search.cpan.org/CPAN/authors/id/B/BW/BWARFIELD/GDGraph-1.44.tar.gz ... OK Configuring GDGraph-1.44 ... OK ==> Found dependencies: GD::Text --> Working on GD::Text Fetching http://search.cpan.org/CPAN/authors/id/M/MV/MVERB/GDTextUtil-0.86.tar.gz ... OK Configuring GDTextUtil-0.86 ... OK Building and testing GDTextUtil-0.86 ... FAIL ! Installing GD::Text failed. See /Users/shohei/.cpanm/build.log for details. ! Bailing out the installation for GDGraph-1.44. Retry with --prompt or --force.
/Users/<homeuser>/.cpanm/build.log ファイルの中身
Test Summary Report$ 90 -------------------$ 91 t/align.t (Wstat: 512 Tests: 0 Failed: 0)$ 92 Non-zero exit status: 2$ 93 Parse errors: Bad plan. You planned 21 tests but ran 0.$ 94 t/text.t (Wstat: 512 Tests: 0 Failed: 0)$ 95 Non-zero exit status: 2$ 96 Parse errors: Bad plan. You planned 20 tests but ran 0.$ 97 t/wrap.t (Wstat: 512 Tests: 0 Failed: 0)$ 98 Non-zero exit status: 2$ 99 Parse errors: Bad plan. You planned 14 tests but ran 0.$ 100 Files=3, Tests=0, 0 wallclock secs ( 0.03 usr 0.01 sys + 0.17 cusr 0 .03 csys = 0.24 CPU)$ 101 Result: FAIL$ 102 Failed 3/3 test programs. 0/0 subtests failed.$ 103 make: *** [test_dynamic] Error 2$ 104 -> FAIL Installing GD::Text failed. See /Users/shohei/.cpanm/build.log f or details.$ 105 -> FAIL Bailing out the installation for GDGraph-1.44. Retry with --prom pt or --force.$
まとめると、自分の環境では、
・cpanmを使う(CPANシェルがNet::FTPで引っかかる)
・GDは知らん
ということで。
OSX lionにperlbrewをインストール
http://blog.kiftwi.net/2011/08/02/os-x-lion%E3%81%ABperlbrew%E3%81%A8cpanm%E3%82%92%E3%82%A4%E3%83%B3%E3%82%B9%E3%83%88%E3%83%BC%E3%83%AB%E3%81%97%E3%81%A6perl%E7%92%B0%E5%A2%83%E3%82%92%E6%95%B4%E3%81%88%E3%82%8B/
CPANが使えない件、バグかも。
http://www.gossamer-threads.com/lists/perl/porters/260169
/System/Library/Perl/5.12/Net/FTP/dataconn.pm
line 54. あたりを編集
sub _close { my $data = shift; my $ftp = ${*$data}{'net_ftp_cmd'}; $data->SUPER::close(); return if !defined $ftp; #Here is added code delete ${*$ftp}{'net_ftp_dataconn'} if exists ${*$ftp}{'net_ftp_dataconn'} && $data == ${*$ftp}{'net_ftp_dataconn'}; } sub close { my $data = shift; my $ftp = ${*$data}{'net_ftp_cmd'}; if(exists ${*$data}{'net_ftp_bytesread'} && !${*$data}{'net_ftp_eof'}) { my $junk; $data->read($junk,1,0); return $data->abort unless ${*$data}{'net_ftp_eof'}; } $data->_close; return if !defined $ftp; #Here is added code $ftp->response() == CMD_OK && $ftp->message =~ /unique file name:\s*(\S*)\s*\)/ && (${*$ftp}{'net_ftp_unique'} = $1); $ftp->status == CMD_OK; }