how to code something このページをアンテナに追加 RSSフィード

2016-11-12

Perlでk-meansクラスタリング

完全にランダムなエリアから重心を選ぶと計算がうまくいかなかったので、ランダムにサンプルを抽出した。
f:id:seinzumtode:20161112170340p:image

TODO:任意のkを与えられるようにする
kmeans.pl

use strict;
use warnings;

use Moo;
use feature qw(say);
use Switch;
use Data::Dumper;
use List::Util qw(first max maxstr min minstr reduce shuffle sum);
use POSIX;

# k-means clustering
# k=3
# iris data from R

# initialize constant
my $loop_count = 0;
my $smax=0;
my $smin=1000; #should be large enough
my $lmax=0;
my $lmin=1000; #should be large enough
my $global_max; 
my $global_min; 

my @points;
sub read_file(){
    open(IN, "< ./dat/irisdata.txt");
	while(<IN>){
		if($loop_count==0){
			$loop_count++; 
			next;
		}
		my @array = split('\s',$_);
    push(@array,0); # this is label (=k)

    $smax=max($smax,$array[0]);
    $smin=min($smin,$array[0]);
    $lmax=max($lmax,$array[1]);
    $lmin=min($lmin,$array[1]);

    push(@points, \@array);
}
close(IN);
$global_max = max($smax,$lmax);
$global_min = min($smin,$lmin);
}

my @centers; 

sub select_random_center{
	for(my $i=0;$i<3;$i++){
	  my $xc = rand($global_max-$global_min)+$global_min;
	  my $yc = rand($global_max-$global_min)+$global_min;
	  my $pair = [$xc,$yc];
	  @centers[$i] = $pair;
	}
}

sub select_center_from_sample{
	my $length = $#points+1;
	for(my $i=0;$i<3;$i++){
	  my $xc = $points[floor(rand($length))][0];
	  my $yc = $points[floor(rand($length))][1];
	  my $pair = [$xc,$yc];
	  @centers[$i] = $pair;
	}
}

sub assign_label{
	foreach my $point(@points){
		my $xp = $point->[0];
		my $yp = $point->[1];
		my $label = 0;
		my $min_norm = $global_max**2; # should be large enough
		foreach my $center(@centers){
			my $xc = $center->[0];
			my $yc = $center->[1];
			my $norm = sqrt(($xp-$xc)**2 + ($yp-$yc)**2);
			if($norm<$min_norm){
				$point->[3] = $label;
				$min_norm = $norm;
			}
			$label++;
		}
	}
}

sub recalculate_center(){
	my @x0s;
	my @y0s;
	my @x1s;
	my @y1s;
	my @x2s;
	my @y2s;
	foreach my $point(@points){
		switch($point->[3]){
			case 0 {
			  push(@x0s,$point->[0]);
			  push(@y0s,$point->[1]);
			}
			case 1 {
			  push(@x1s,$point->[0]);
			  push(@y1s,$point->[1]);
			}
			case 2 {
			  push(@x2s,$point->[0]);
			  push(@y2s,$point->[1]);
			}
		}
	}

	my $new_x0;
	my $new_y0;
	my $count;
  	$count = $#x0s + 1;
	if($count>0){
	  for (my $i=0;$i<$count;$i++){
		$new_x0 += $x0s[$i];
		$new_y0 += $y0s[$i];
	  }
	  $new_x0 = $new_x0 / $count;
	  $new_y0 = $new_y0 / $count;
	  my $new_pair0 = [$new_x0,$new_y0];
	  @centers[0] = $new_pair0;
	}

	my $new_x1;
	my $new_y1;
	$count = $#x1s + 1;
	if($count>0){
	  for (my $i=0;$i<$count;$i++){
		  $new_x1 += $x1s[$i];
		  $new_y1 += $y1s[$i];
	  }
	  $new_x1 = $new_x1 / $count;
	  $new_y1 = $new_y1 / $count;
	  my $new_pair1 = [$new_x1,$new_y1];
	  @centers[1] = $new_pair1;
	} 

	my $new_x2;
	my $new_y2;
	$count = $#x2s + 1;
	if($count>0){
	  for (my $i=0;$i<$count;$i++){
		  $new_x2 += $x2s[$i];
		  $new_y2 += $y2s[$i];
	  }
	  $new_x2 = $new_x2 / $count;
	  $new_y2 = $new_y2 / $count;
	  my $new_pair2 = [$new_x2,$new_y2];
	  @centers[2] = $new_pair2;
	} 
}

sub write_to_file(){
	open(OUT,'> result.csv');

	foreach my $point(@points){
	  print OUT $point->[0] . ',';	
	  print OUT $point->[1] . ',';	
	  print OUT $point->[2] . ',';	
	  print OUT $point->[3] . "\n";	
	}
	close(OUT);

	open(OUT,'> center.csv');

	foreach my $center(@centers){
	  print OUT $center->[0] . ',';	
	  print OUT $center->[1] . ',';	
	  print OUT "center" . "\n";	
	}
	close(OUT);
}

sub k_means(){
  read_file();
  # select_random_center();
  select_center_from_sample();
  # loop for 1000 times
	for(my $i=0;$i<1000;$i++){
	  assign_label();
	  recalculate_center();
	  say "loop $i has finished";
	}

	say 'computation done.';
	say Dumper @centers;

	write_to_file();
}

k_means();


dat/irisdata.txt

"iris.Sepal.Length" "iris.Petal.Length" "iris.Species"
5.1 1.4 "setosa"
4.9 1.4 "setosa"
4.7 1.3 "setosa"
4.6 1.5 "setosa"
5 1.4 "setosa"
5.4 1.7 "setosa"
4.6 1.4 "setosa"
5 1.5 "setosa"
4.4 1.4 "setosa"
4.9 1.5 "setosa"
5.4 1.5 "setosa"
4.8 1.6 "setosa"
4.8 1.4 "setosa"
4.3 1.1 "setosa"
5.8 1.2 "setosa"
5.7 1.5 "setosa"
5.4 1.3 "setosa"
5.1 1.4 "setosa"
5.7 1.7 "setosa"
5.1 1.5 "setosa"
5.4 1.7 "setosa"
5.1 1.5 "setosa"
4.6 1 "setosa"
5.1 1.7 "setosa"
4.8 1.9 "setosa"
5 1.6 "setosa"
5 1.6 "setosa"
5.2 1.5 "setosa"
5.2 1.4 "setosa"
4.7 1.6 "setosa"
4.8 1.6 "setosa"
5.4 1.5 "setosa"
5.2 1.5 "setosa"
5.5 1.4 "setosa"
4.9 1.5 "setosa"
5 1.2 "setosa"
5.5 1.3 "setosa"
4.9 1.4 "setosa"
4.4 1.3 "setosa"
5.1 1.5 "setosa"
5 1.3 "setosa"
4.5 1.3 "setosa"
4.4 1.3 "setosa"
5 1.6 "setosa"
5.1 1.9 "setosa"
4.8 1.4 "setosa"
5.1 1.6 "setosa"
4.6 1.4 "setosa"
5.3 1.5 "setosa"
5 1.4 "setosa"
7 4.7 "versicolor"
6.4 4.5 "versicolor"
6.9 4.9 "versicolor"
5.5 4 "versicolor"
6.5 4.6 "versicolor"
5.7 4.5 "versicolor"
6.3 4.7 "versicolor"
4.9 3.3 "versicolor"
6.6 4.6 "versicolor"
5.2 3.9 "versicolor"
5 3.5 "versicolor"
5.9 4.2 "versicolor"
6 4 "versicolor"
6.1 4.7 "versicolor"
5.6 3.6 "versicolor"
6.7 4.4 "versicolor"
5.6 4.5 "versicolor"
5.8 4.1 "versicolor"
6.2 4.5 "versicolor"
5.6 3.9 "versicolor"
5.9 4.8 "versicolor"
6.1 4 "versicolor"
6.3 4.9 "versicolor"
6.1 4.7 "versicolor"
6.4 4.3 "versicolor"
6.6 4.4 "versicolor"
6.8 4.8 "versicolor"
6.7 5 "versicolor"
6 4.5 "versicolor"
5.7 3.5 "versicolor"
5.5 3.8 "versicolor"
5.5 3.7 "versicolor"
5.8 3.9 "versicolor"
6 5.1 "versicolor"
5.4 4.5 "versicolor"
6 4.5 "versicolor"
6.7 4.7 "versicolor"
6.3 4.4 "versicolor"
5.6 4.1 "versicolor"
5.5 4 "versicolor"
5.5 4.4 "versicolor"
6.1 4.6 "versicolor"
5.8 4 "versicolor"
5 3.3 "versicolor"
5.6 4.2 "versicolor"
5.7 4.2 "versicolor"
5.7 4.2 "versicolor"
6.2 4.3 "versicolor"
5.1 3 "versicolor"
5.7 4.1 "versicolor"
6.3 6 "virginica"
5.8 5.1 "virginica"
7.1 5.9 "virginica"
6.3 5.6 "virginica"
6.5 5.8 "virginica"
7.6 6.6 "virginica"
4.9 4.5 "virginica"
7.3 6.3 "virginica"
6.7 5.8 "virginica"
7.2 6.1 "virginica"
6.5 5.1 "virginica"
6.4 5.3 "virginica"
6.8 5.5 "virginica"
5.7 5 "virginica"
5.8 5.1 "virginica"
6.4 5.3 "virginica"
6.5 5.5 "virginica"
7.7 6.7 "virginica"
7.7 6.9 "virginica"
6 5 "virginica"
6.9 5.7 "virginica"
5.6 4.9 "virginica"
7.7 6.7 "virginica"
6.3 4.9 "virginica"
6.7 5.7 "virginica"
7.2 6 "virginica"
6.2 4.8 "virginica"
6.1 4.9 "virginica"
6.4 5.6 "virginica"
7.2 5.8 "virginica"
7.4 6.1 "virginica"
7.9 6.4 "virginica"
6.4 5.6 "virginica"
6.3 5.1 "virginica"
6.1 5.6 "virginica"
7.7 6.1 "virginica"
6.3 5.6 "virginica"
6.4 5.5 "virginica"
6 4.8 "virginica"
6.9 5.4 "virginica"
6.7 5.6 "virginica"
6.9 5.1 "virginica"
5.8 5.1 "virginica"
6.8 5.9 "virginica"
6.7 5.7 "virginica"
6.7 5.2 "virginica"
6.3 5 "virginica"
6.5 5.2 "virginica"
6.2 5.4 "virginica"
5.9 5.1 "virginica"

Perlで多次元配列

http://www.kent-web.com/perl/chap3.html#chap3_7

#多次元配列
@array = ([1,2],[3,4]);
#アクセス
print $array[0][1];

Perlの配列/ハッシュの初期化と配列/ハッシュのリファレンス宣言

そういうことだったのか 

# 配列の初期化
@array = (1,2,3);
# ハッシュの初期化
%hash = (a=>1,b=>2,c=>3);
# 配列のリファレンス
$array_reference = [1,2,3]; 
 # デリファレンス
   print $array_reference->[0]
# ハッシュのリファレンス
$hash_reference = {a=>1,b=2,c=3};
 #デリファレンス
 print $hash_reference->{'a'};

2012-01-13

GD::GraphのCPANからのインストールに向けて

できない。

# sudo perl -MCPAN -e shell
# install GD::Graph

すると、FTPが失敗する。
Can't use an undefined value as a symbol reference at /System/Library/Perl/5.12/Net/FTP/dataconn.pm line 54.
フォーラム読んでソースを修正してみたけど動かず。

仕方ないからCPANMに切り替えるも

[@ GD-2.46]$sudo cpanm libgd
! Finding libgd on cpanmetadb failed.
! Finding libgd on search.cpan.org failed.
! Finding libgd () on mirror http://search.cpan.org/CPAN failed.
! Couldn't find module or a distribution libgd ()
[@aoki-mba GD-2.46]$cpan GD::Graph
Going to read '/Users/shohei/.cpan/Metadata'
  Database was generated on Sun, 22 May 2011 13:37:59 GMT
Warning: You are not allowed to write into directory "/Users/shohei/.cpan/sources/authors".
    I'll continue, but if you encounter problems, they may be due
    to insufficient permissions.
Fetching with LWP:
ftp://ftp.u-aizu.ac.jp/pub/CPAN/authors/01mailrc.txt.gz
LWP failed with code[500] message[Can't use an undefined value as a symbol reference]
Fetching with Net::FTP:
ftp://ftp.u-aizu.ac.jp/pub/CPAN/authors/01mailrc.txt.gz
Can't use an undefined value as a symbol reference at /System/Library/Perl/5.12/Net/FTP/dataconn.pm line 54.

よく覚えてないけど

というエラーも出ている。

ソースからgdインストールを試みる。
http://kempwire.com/tips/installing-gd-graphics-library-on-mac-os-x.html

などをインストールしておく。

肝心のgdは開発中止なのか知らないけどどこにもない。
bitbucketに上がってるのをcloneする(要Mercurialインストール
※ちなみにここ(http://www.dmxzone.com/go?6824)を参考にして、



http://www.boutell.com/gd/http/gd-2.0.28.tar.gz


って手打ちしたらソースが落ちてきた。とっていいのかは知らん。

んでGDインストール
できたっぽい。

そこで再度cpanmを試みるも、

[@ GD-2.46]$sudo cpanm GD::Graph
--> Working on GD::Graph
Fetching http://search.cpan.org/CPAN/authors/id/B/BW/BWARFIELD/GDGraph-1.44.tar.gz ... OK
Configuring GDGraph-1.44 ... OK
==> Found dependencies: GD::Text
--> Working on GD::Text
Fetching http://search.cpan.org/CPAN/authors/id/M/MV/MVERB/GDTextUtil-0.86.tar.gz ... OK
Configuring GDTextUtil-0.86 ... OK
Building and testing GDTextUtil-0.86 ... FAIL
! Installing GD::Text failed. See /Users/shohei/.cpanm/build.log for details.
! Bailing out the installation for GDGraph-1.44. Retry with --prompt or --force.

/Users/<homeuser>/.cpanm/build.log ファイルの中身


Test Summary Report$
 90 -------------------$
 91 t/align.t (Wstat: 512 Tests: 0 Failed: 0)$
 92   Non-zero exit status: 2$
 93   Parse errors: Bad plan.  You planned 21 tests but ran 0.$
 94 t/text.t (Wstat: 512 Tests: 0 Failed: 0)$
 95   Non-zero exit status: 2$
 96   Parse errors: Bad plan.  You planned 20 tests but ran 0.$
 97 t/wrap.t (Wstat: 512 Tests: 0 Failed: 0)$
 98   Non-zero exit status: 2$
 99   Parse errors: Bad plan.  You planned 14 tests but ran 0.$
100 Files=3, Tests=0,  0 wallclock secs ( 0.03 usr  0.01 sys +  0.17 cusr  0    .03 csys =  0.24 CPU)$
101 Result: FAIL$
102 Failed 3/3 test programs. 0/0 subtests failed.$
103 make: *** [test_dynamic] Error 2$
104 -> FAIL Installing GD::Text failed. See /Users/shohei/.cpanm/build.log f    or details.$
105 -> FAIL Bailing out the installation for GDGraph-1.44. Retry with --prom    pt or --force.$

まとめると、自分の環境では、
・cpanmを使う(CPANシェルがNet::FTPで引っかかる)
GDは知らん

ということで。

OSX lionにperlbrewをインストール

http://blog.kiftwi.net/2011/08/02/os-x-lion%E3%81%ABperlbrew%E3%81%A8cpanm%E3%82%92%E3%82%A4%E3%83%B3%E3%82%B9%E3%83%88%E3%83%BC%E3%83%AB%E3%81%97%E3%81%A6perl%E7%92%B0%E5%A2%83%E3%82%92%E6%95%B4%E3%81%88%E3%82%8B/

CPANが使えない件、バグかも。
http://www.gossamer-threads.com/lists/perl/porters/260169

/System/Library/Perl/5.12/Net/FTP/dataconn.pm
line 54. あたりを編集

sub _close 
{ 
my $data = shift; 
my $ftp = ${*$data}{'net_ftp_cmd'}; 

$data->SUPER::close(); 

return if !defined $ftp;   #Here is added code

delete ${*$ftp}{'net_ftp_dataconn'} 
if exists ${*$ftp}{'net_ftp_dataconn'} && 
$data == ${*$ftp}{'net_ftp_dataconn'}; 
} 

sub close 
{ 
my $data = shift; 
my $ftp = ${*$data}{'net_ftp_cmd'}; 

if(exists ${*$data}{'net_ftp_bytesread'} && !${*$data}{'net_ftp_eof'}) { 
my $junk; 
$data->read($junk,1,0); 
return $data->abort unless ${*$data}{'net_ftp_eof'}; 
} 

$data->_close; 

return if !defined $ftp; #Here is added code

$ftp->response() == CMD_OK && 
$ftp->message =~ /unique file name:\s*(\S*)\s*\)/ && 
(${*$ftp}{'net_ftp_unique'} = $1); 
$ftp->status == CMD_OK; 
}