市來健吾の日記

プログラマ、(元)物理屋(ナノテク、流体)

ここの contents を blosxom に変換する script が、 とりあえず結果を出す所まで来た


  • ので、試験も兼ねて 2003 年 12 月 分を 置いてみる

  • 1/6/2004, 12/22/2003, 12/6/2003, 12/3/2003

  • 2/2/2007: html と blosxom の運用について思案中

  • 1/1/2008: diary2hatena を思案中。

  • 付記(7/13/2008): script は一応ここに置いてあるけれど、 はてなスーパーpre記法(しつこいか)でコードを載っけるのもいいかな、と思って、 ここに張り付けておく(最新版):

    #!/usr/local/bin/perl
    # converter from my diary.html to blosxom
    # Copyright (C) 2004-2008 Kengo Ichiki <kengoichiki@yahoo.com>
    # $Id: diary2blosxom.pl,v 1.21 2008/07/13 15:41:53 ichiki Exp $
    use HTML::Parser;
    use Jcode;
    
    $topurl  = "http://kichiki.web.fc2.com/cgi-bin/blosxom.cgi/diary";
    $flavour = "html";
    
    @list0 = ; # old blosxom entry files
    
    # globals for check_category_in_link
    $check_link_tag = '';
    $check_link_li = 0;
    $check_link_text = '';
    
    # check for command-line argument
    die "Usage: diary2blosxom.pl (HTML file)\n" unless @ARGV == 1;
    
    # get the command-line argument
    my $file = shift;
    my $path, $file0;
    
    if ($file =~ /^diary\d{4}\_\d{2}.shtml$/) {
        $path = '';
    }
    elsif ($file =~ /(.*\/)(diary\d{4}\_\d{2}.shtml)$/) {
        $path = $1;
        $file0 = $2;
    }
    else {
        print "Illeagal diary file!\n";
        exit;
    }
    #print "PATH = $path\n";
    
    # Create HTML Parser object
    my $p = HTML::Parser->new
        (api_version => 3,
         start_h => [\&amp;start, "tagname,attr"],
         text_h  => [\&amp;text,  "dtext"],
         end_h   => [\&amp;end,   "tagname"],
         );
    
    die "File $file does't exist.\n" unless -e $file;
    open (HTML, "< $file") or die "Cannot open $file";
    
    $h1flag  = 0; # h1 tag
    $h2flag  = 0; # h2 tag
    $preflag = 0; # pre tag
    $nsaflag = 0; # not-shown anchor tag flag -- local copy, no-href tags
    $ullevel = 0; # ul level
    
    $year  = 0;
    $month = 0;
    $day   = 0;
    $num   = 1;
    
    $title = "";
    $body  = "";
    
    $subn    = 0;
    $subday  = 0;
    $subname = "";
    @subfile;
    @subtitle;
    @subbody;
    @subullevel;
    
    while (<HTML>) {
        # convert input-line into utf8
      Jcode::convert ( \$_, "utf8");
        $p->parse($_);
    }
    close (HTML);
    
    foreach $file (@list0) {
        print "removed: $file\n";
        unlink ($file);
    }
    
    exit;
    
    
    sub start
    {
        my ($tagname, $attr) = @_;
        my $i;
        my $a, $c, $tmptag;
        my $link_category;
    
        # h1 tag
        if ($tagname eq "h1") {
    	$h1flag = 1;
        }
    
        # h2 tag
        elsif ($tagname eq "h2") {
    	$h2flag = 1;
        }
    
        # a tag
        elsif ($tagname eq "a") {
    	if ($ullevel == 0 &amp;&amp; $h2flag == 1) {
    	    $_ = $attr->{id};
    	    if (/d(\d)/) {
    		$day = $1;
    		$num   = 1; # reset counter
    	    }
    	}
    
    	if ($ullevel >= 1) {
    	    if ($attr->{href} eq "") {
    		# do nothing
    		$nsaflag = 1;
    	    }
    	    else {
    		$_ = $attr->{href};
    		$_ =~ s/^images\/AMAZON/http:\/\/images.amazon.com\/images\/P/;
    		$_ =~ s/^theorems.html/http:\/\/kichiki.web.fc2.com\/diary\/theorems.html/;
    		$_ =~ s/^summaries.html/http:\/\/kichiki.web.fc2.com\/diary\/summaries.html/;
    		$_ =~ s/^images\//http:\/\/kichiki.web.fc2.com\/diary\/images\//;
    		if (/^\#(.)/) {
    		    $_ = sprintf ("diary%4d_%02d.shtml\#%s",
    				  $year, $month, $1);
    		}
    
    		if (/^diary(\d{4})\_(\d{2}).shtml\#d(\d)-(.)/) {
    		    $link_category = check_category_in_link ($_);
    		    if ($link_category eq "") {
    			$_ = sprintf ("$topurl/%4d%02d%02d-%s.$flavour",
    				      $1, $2, $3, $4);
    		    }
    		    else {
    			$_ = sprintf
    			    ("$topurl/$link_category/%4d%02d%02d-%s.$flavour",
    			     $1, $2, $3, $4);
    		    }
    		}
    		elsif (/^diary(\d{4})\_(\d{2}).shtml\#d(\d)/) {
    		    $_ = "$topurl/$1/$2/$3";
    		}
    		elsif (/^diary(\d{4})\_(\d{2}).shtml/) {
    		    $_ = "$topurl/$1/$2";
    		}
    
    		if ($_ eq "") {
    		    # do nothing
    		    $nsaflag = 1;
    		}
    		elsif (/^LOCAL\//) {
    		    # do nothing
    		    $nsaflag = 1;
    		}
    		else {
    		    $nsaflag = 0; # to show (for sure)
    		    if ($subn == 0) {
    			if ($ullevel == 1) {
    			    $title .= "<a href=\"$_\">";
    			}
    			else {
    			    $body .= "<a href=\"$_\">";
    			}
    		    }
    		    else {
    			if ($subullevel[$subn-1] == $ullevel) {
    			    $subtitle[$subn-1] .= "<a href=\"$_\">";
    			}
    			else {
    			    $subbody[$subn-1] .= "<a href=\"$_\">";
    			}
    		    }
    		}
    	    }
    
    	    if ($attr->{id} ne "") {
    		$_ = $attr->{id};
    		if (/d(\d)-(.)/) {
    		    $subn ++;
    		    $subday  = $1;
    		    $subname = $2;
    		    if ($subday != $day) {
    			print "WRONG!! id= ",$attr->{id},"\n";
    		    }
    		    $subfile [$subn-1]
    			= sprintf ("%4d%02d%02d-%s",
    				   $year, $month, $day, $subname);
    		    $subullevel [$subn-1] = $ullevel;
    		    $subtitle [$subn-1] = "";
    		    $subbody  [$subn-1] = "";
    		}
    	    }
    	}
        }
    
        # ul tag
        elsif ($tagname eq "ul") {
    	if ($ullevel >= 1) {
    	    if ($subn == 0) {
    		$body .= "\n";
    		for ($i = 1; $i < $ullevel; $i ++) {
    		    $body .= " ";
    		}
    		$body .= "<ul>\n";
    	    }
    	    else {
    		$subbody [$subn-1] .= "\n";
    		for ($i = 1; $i < ($ullevel - $subullevel[$subn-1]); $i ++) {
    		    $subbody [$subn-1] .= " ";
    		}
    		$subbody [$subn-1] .= "<ul>\n";
    	    }
    	}
    
    	$ullevel ++;
        }
    
        # li tag
        elsif ($tagname eq "li") {
    	if ($subn == 0) {
    	    if ($ullevel == 1) {
    		# start of the entry
    		$title = "";
    		$body  = "";
    	    }
    	    elsif ($ullevel > 1) {
    		for ($i = 2; $i < $ullevel; $i ++) {
    		    $body .= " ";
    		}
    		$body .= "<li>\n";
    	    }
    	}
    	else {
    	    for ($i = 2; $i < ($ullevel - $subullevel[$subn-1]); $i ++) {
    		$subbody [$subn-1] .= " ";
    	    }
    	    $subbody [$subn-1] .= "<li>\n";
    	}
        }
    
        # pre tag
        elsif ($tagname eq "pre") {
    	$preflag = 1;
    	if ($subn == 0) {
    	    if ($ullevel == 1) {
    		$title .= "<pre>";
    	    }
    	    elsif ($ullevel > 1) {
    		$body .= "<pre>";
    	    }
    	}
    	else {
    	    if ($ullevel == $subullevel[$subn-1]) {
    		$subtitle[$subn-1] .= "<pre>";
    	    }
    	    elsif ($ullevel > $subullevel[$subn-1]) {
    		$subbody[$subn-1] .= "<pre>";
    	    }
    	}
        }
    
        # other tags
        else {
    	if ($ullevel >= 1) {
    	    $tmptag = "<$tagname";
    	    while (($a, $c) = each (%{$attr})) {
    		# for amazon images
    		if (($tagname eq "img")
    		    and ($a eq "src")) {
    		    if ($c =~ /^images\/AMAZON/) {
    			$c =~ s/^images\/AMAZON/http:\/\/images.amazon.com\/images\/P/;
    		    }
    		    elsif ($c =~ /^images/) {
    			$c =~ s/^images\//http:\/\/kichiki.web.fc2.com\/diary\/images\//;
    		    }
    		}
    
    		if ($a ne "/") {
    		    $tmptag .= " $a=\"$c\"";
    		}
    	    }
    	    if (($tagname eq "img")
    		or ($tagname eq "hr")
    		or ($tagname eq "br")) {
    		$tmptag .= " />";
    	    }
    	    else {
    		$tmptag .= ">";
    	    }
    
    	    if ($subn == 0) {
    		if ($ullevel == 1) {
    		    $title .= $tmptag;
    		}
    		else {
    		    $body .= $tmptag;
    		}
    	    }
    	    else {
    		if ($subullevel[$subn-1] == $ullevel) {
    		    $subtitle[$subn-1] .= $tmptag;
    		}
    		else {
    		    $subbody[$subn-1] .= $tmptag;
    		}
    	    }
    	}
        }
    }
    
    sub text
    {
        my ($dtext) = shift;
    
        $dtext =~ s/&amp;/&amp;/g;
        if ($preflag != 1) {
    	$dtext =~ s// /g;
    	$dtext =~ s/^\n//g;
        }
        else {
    	$dtext =~ s/</</g;
    	$dtext =~ s/>/>/g;
    	$dtext =~ s/"/"/g;
        }
    
        if ($h1flag == 1) {
    	#print "dtext = $dtext\n";
    	$_ = $dtext;
    	if (/(\d)(\d)/) {
    	    $year  = $1;
    	    $month = $2;
    	    #print "year = $year\nmonth = $month\n";
    
    	    @list0 = getentries (".", $year, $month);
    	}
        }
    
        if ($nsaflag == 0) {
    	if ($subn == 0) {
    	    if ($ullevel == 1) {
    		$title .= $dtext;
    	    }
    	    elsif ($ullevel > 1) {
    		$body .= $dtext;
    	    }
    	}
    	elsif ($subullevel[$subn-1] == $ullevel) {
    	    $subtitle[$subn-1] .= $dtext;
    	}
    	else {
    	    $subbody[$subn-1] .= $dtext;
    	}
        }
    }
    
    sub end
    {
        my ($tagname) = shift;
        my $i;
        my $blosxomfile;
        my $category;
        my $localtitle;
    
        # h1 tag
        if ($tagname eq "h1") {
    	$h1flag = 0;
        }
    
        # h2 tag
        elsif ($tagname eq "h2") {
    	$h2flag = 0;
        }
    
        # ul tag
        elsif ($tagname eq "ul") {
    	$ullevel --;
    	if ($subn == 0) {
    	    if ($ullevel >= 1) {
    		$body .= "\n";
    		for ($i = 1; $i < $ullevel; $i ++) {
    		    $body .= " ";
    		}
    		$body .= "</ul>\n";
    	    }
    	}
    	else {
    	    $subbody[$subn-1] .= "\n";
    	    for ($i = 1; $i < ($ullevel-$subullevel[$subn-1]); $i ++) {
    		$subbody[$subn-1] .= " ";
    	    }
    	    $subbody[$subn-1] .= "</ul>\n";
    	}
        }
    
        # a tag
        elsif ($tagname eq "a") {
    	if ($ullevel >= 1 &amp;&amp; $nsaflag == 0) {
    	    if ($subn == 0) {
    		if ($ullevel == 1) {
    		    $title .= "</a>";
    		}
    		elsif ($ullevel > 1) {
    		    $body .= "</a>";
    		}
    	    }
    	    else {
    		if ($ullevel == $subullevel[$subn-1]) {
    		    $subtitle[$subn-1] .= "</a>";
    		}
    		elsif ($ullevel > $subullevel[$subn-1]) {
    		    $subbody[$subn-1] .= "</a>";
    		}
    	    }
    	}
    	else {
    	    $nsaflag = 0;
    	}
        }
    
        # li tag
        elsif ($tagname eq "li") {
    	if ($subn == 0) {
    	    if ($ullevel == 1) {
    		$blosxomfile = sprintf ("%4d%02d%02d%02d",
    					$year, $month, $day, $num);
    
    		# end of the entry
    		print_entry ($blosxomfile,
    			     $title,
    			     $body,
    			     $year, $month, $day, $num);
    
    		# for sure
    		$num ++;
    		$title = "";
    		$body  = "";
    	    }
    	    elsif ($ullevel > 1) {
    		$body .= "\n";
    		for ($i = 1; $i < $ullevel; $i ++) {
    		    $body .= " ";
    		}
    		$body .= "</li>\n";
    	    }
    	}
    	else {
    	    if ($ullevel == $subullevel[$subn-1]) {
    		# end of the entry
    		print_entry ($subfile[$subn-1],
    			     $subtitle[$subn-1],
    			     $subbody[$subn-1],
    			     $year, $month, $day, $num);
    
    		# make a link
    		# categorize
    		#$category = categorize ($subtitle[$subn-1]);
    		($category, $subtitle[$subn-1])
    		    = categorize ($subtitle[$subn-1]);
    
    		# remove anchor tag in the tiltke
    		$localtitle = $subtitle[$subn-1];
    		$localtitle =~ s/<a .?>//g;
    		$localtitle =~ s/<\/a>//g;
    		if ($subn == 1) {
    		    if ($category eq "") {
    			$body .=
    			    "<a href=\"$topurl/$subfile[$subn-1].$flavour\">$localtitle</a>";
    		    }
    		    else {
    			$body .=
    			    "<a href=\"$topurl/$category/$subfile[$subn-1].$flavour\">$localtitle</a>";
    		    }
    		}
    		else {
    		    if ($category eq "") {
    			$subbody [$subn - 2] .=
    			    "<a href=\"$topurl/$subfile[$subn-1].$flavour\">$localtitle</a>";
    		    }
    		    else {
    			$subbody [$subn - 2] .=
    			    "<a href=\"$topurl/$category/$subfile[$subn-1].$flavour\">$localtitle</a>";
    		    }
    		}
    		# for sure
    		$num ++;
    		$subfile  [$subn-1] = "";
    		$subtitle [$subn-1] = "";
    		$subbody  [$subn-1] = "";
    		$subullevel[$subn-1] = 0;
    
    		$subn --;
    	    }
    	    else {
    		$subbody[$subn-1] .= "\n";
    		for ($i = 1; $i < ($ullevel-$subullevel[$sun-1]); $i ++) {
    		    $subbody[$subn-1] .= " ";
    		}
    		$subbody[$subn-1] .= "</li>\n";
    	    }
    	}
        }
    
        # pre tag
        elsif ($tagname eq "pre") {
    	$preflag = 0;
    	if ($subn == 0) {
    	    if ($ullevel == 1) {
    		$title .= "</pre>";
    	    }
    	    elsif ($ullevel > 1) {
    		$body .= "</pre>";
    	    }
    	}
    	else {
    	    if ($ullevel == $subullevel[$subn-1]) {
    		$subtitle[$subn-1] .= "</pre>";
    	    }
    	    elsif ($ullevel > $subullevel[$subn-1]) {
    		$subbody[$subn-1] .= "</pre>";
    	    }
    	}
        }
    
        # other tags
        else {
    	if ($ullevel >= 1) {
    	    if ($subn == 0) {
    		if ($ullevel == 1) {
    		    $title .= "</$tagname>";
    		}
    		else {
    		    $body .= "</$tagname>";
    		}
    	    }
    	    else {
    		if ($ullevel == $subullevel[$subn-1]) {
    		    $subtitle[$subn-1] .= "</$tagname>";
    		}
    		else {
    		    $subbody[$subn-1] .= "</$tagname>";
    		}
    	    }
    	}
        }
    }
    
    
    sub print_entry {
        my ($f, # file
    	$t, # title
    	$b, # body
    	$y, # year (4 digits)
    	$m, # month (1 - 12)
    	$d, # day   (1 - 31)
    	$n  # number (1 - )
    	) = @_;
    
        my $category;
        my @txtmon = ("Jan", "Feb", "Mar", "Apr", "May", "Jun",
    		  "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", "13");
        my $filename = '';
    
        $t =~ s/\n//g;
        $t =~ s/^ +//g;
        $t =~ s/ +$//g;
        $t =~ s/  / /g;
        $t =~ s/\Q\E//g;
        if ($t eq "") {
    	$t = "-";
        }
    
        $b =~ s/\Q\E//g;
    
        # categorize
        #$category = categorize ($t);
        ($category, $t) = categorize ($t);
    
        if ($category ne "") {
    	unless (-e $category) {
    	    mkdir "$category", 0755
    		or die "Cannot make dir $category\n";
    	}
    	$filename = "./$category/$f.txt";
        }
        else {
    	$filename = "./$f.txt";
        }
    
        open (FH, "> tmp.txt");
        print (FH "$t\n");
        printf (FH "meta-creation_date: %s %d, %d 00:%02d\n",
    	    $txtmon[int($m-1)], $d, $y, $n);
        print (FH "$b\n");
        close (FH);
    
        my $flag_match = 0;
        my $count = 0;
        foreach my $file (@list0) {
    	if ($file eq $filename) {
    	    $flag_match = 1;
    	    if (!compare_files ("tmp.txt", $file)) {
    		# modified
    		print ("updated: $filename\n");
    		rename ("tmp.txt", $filename);
    	    }
    	    else {
    		# same
    		unlink ("tmp.txt");
    	    }
    	    # remove the file from @list0
    	    splice (@list0, $count, 1);
    
    	    return;
    	}
    	$count ++;
        }
    
        if ($flag_match == 0) {
    	# new entry
    	print ("new:     $filename\n");
    	rename ("tmp.txt", $filename);
        }
    }
    
    
    # "getfiles" is borrowed from
    # Blosxom Plugin: recentwritebacks_tree
    # Author(s): typester <typester@unknownplace.org>
    # Version: 1.0
    # Blosxom Home/Docs/Licensing: http://www.blosxom.com/
    sub getfiles {
        my $dir = $_[0];
        my @files = ;
        my @ret = ;
    
        $dir .= "/" if ($dir =~ /[^\/]$/);
        if (opendir (DIR, $dir)) {
    	@files = readdir (DIR);
    	closedir (DIR);
        }
    
        foreach my $file (@files) {
    	next if ($file eq '.' or $file eq '..');
    	if (-d "$dir$file" and $file ne '') {
    	    my @subdir = &amp;getfiles("$dir$file");
    	    @ret = (@ret, @subdir);
    	}
    	else {
    	    push (@ret, "$dir$file");
    	}
        }
    
        return @ret;
    }
    
    sub getentries {
        my ($dir, $year, $month) = @_;
        my @ret = ;
    
        my @files = getfiles ($dir);
        my $str_ym = sprintf ("%4d%02d", $year, $month);
    
        foreach my $file (@files) {
    	if ($file =~ /$str_ym.\.txt/) {
    	    push (@ret, "$file");
    	}
        }
    
        return @ret;
    }
    
    # return true when two files are same
    sub compare_files {
        my ($f1, $f2) = @_;
        my $l1 = '';
        my $l2 = '';
        my $ret = 1; # true
    
        my $s1 = -s $f1;
        my $s2 = -s $f2;
        if ($s1 != $s2) {
    	$ret = 0; # false
        }
        else {
    	open (F1, "< $f1");
    	open (F2, "< $f2");
    
          COMPARE: while ($l1 = <F1> and
    		      $l2 = <F2>) {
    	  if ($l1 ne $l2) {
    	      $ret = 0; # false
    	      last COMPARE;
    	  }
          }
    	close (F1);
    	close (F2);
        }
    
        return $ret;
    }
    
    
    # check link
    sub check_category_in_link {
        my $link = shift;
        my ($year, $month, $day);
        my $link_parser;
        my $category;
        # globals: $path, $check_link_tag, $check_link_li, $check_link_text.
    
        #print "CHECK : ".$link." => ";
        $category = '';
        if ($link =~ /^diary(\d{4})\_(\d{2}).shtml\#d(\d)-(.)/) {
    	$year  = $1;
    	$month = $2;
    	$day   = $3;
    	$check_link_tag = "d$3-$4";
    	#print $check_link_tag." => ";
    
    	$file = sprintf ("diary%4d_%02d.shtml", $1, $2);
    	#print $file." => ";
    	open (LINK, "< $path$file") or return $category;
    
    	$link_parser = HTML::Parser->new
    	    (api_version => 3,
    	     start_h => [\&amp;check_link_start, "tagname,attr"],
    	     text_h  => [\&amp;check_link_text,  "dtext"],
    	     end_h   => [\&amp;check_link_end,   "tagname"],
    	     );
    	$check_link_li = 0;
    	$check_link_text = '';
    	while (<LINK>) {
    	    # convert input-line into utf8
    	    Jcode::convert ( \$_, "utf8");
    	      $link_parser->parse($_);
    	  }
    	close (LINK);
    
    	#$category = categorize ($check_link_text);
    	($category, $check_link_text) = categorize ($check_link_text);
        }
        #print $category."\n";
    
        return $category;
    }
    
    sub check_link_start
    {
        my ($tagname, $attr) = @_;
        # $check_link_li and $check_link_tag are global.
    
        # a tag
        if ($tagname eq "a") {
    	if ($attr->{id} eq $check_link_tag) {
    	    $check_link_li = 1;
    	}
        }
    }
    
    sub check_link_text
    {
        my ($dtext) = shift;
        # $check_link_li and $check_link_text are global.
    
        $dtext =~ s/&amp;/&amp;/g;
        $dtext =~ s// /g;
        $dtext =~ s/^\n//g;
    
        if ($check_link_li == 1) {
    	$check_link_text .= $dtext;
        }
    }
    
    sub check_link_end
    {
        my ($tagname) = shift;
        # $check_link_li is global.
    
        # li tag
        if ($tagname eq "li" and $check_link_li == 1) {
    	$check_link_li = 0;
        }
    }
    
    
    # check category : news, phys, job, blog, FreeBSD
    sub categorize {
        my $t = shift;
        my $category = '';
    
        #print "CATEGORY: ".$t." => ";
        $t =~ s/^ +//g;
        $t =~ s/^\n//g;
        if ($t =~ /^\Q[\Enews\Q]\E/) {
            $category = "news";
    	$t =~ s/\Q[\Enews\Q]\E//;
        }
        elsif ($t =~ /^\Q[\Ejob\Q]\E/) {
            $category = "job";
    	$t =~ s/\Q[\Ejob\Q]\E//;
        }
        elsif ($t =~ /^\Q[\Ephys\Q]\E/) {
            $category = "phys";
    	$t =~ s/\Q[\Ephys\Q]\E//;
        }
        elsif ($t =~ /^\Q[\Eblog\Q]\E/) {
            $category = "blog";
    	$t =~ s/\Q[\Eblog\Q]\E//;
        }
        elsif ($t =~ /^\Q[\EFreeBSD\Q]\E/) {
            $category = "FreeBSD";
    	$t =~ s/\Q[\EFreeBSD\Q]\E//;
        }
        elsif ($t =~ /^\Q[\Een\Q]\E/) {
            $category = "en";
    	$t =~ s/\Q[\Een\Q]\E//;
        }
        elsif ($t =~ /^\Q[\ESF\Q]\E/) {
            $category = "SF";
    	$t =~ s/\Q[\ESF\Q]\E//;
        }
        elsif ($t =~ /^\Q[\Emonton\Q]\E/) {
            $category = "monton";
    	$t =~ s/\Q[\Emonton\Q]\E//;
        }
        #elsif ($t =~ /^\Q[\E.+\Q]\E/) {
        #    $category = "phys";
        #}
        #print $category."\n";
    
        #return $category;
        return ($category, $t);
    }
    

    cf. 7/12/2008: はてなの名前付きタグの件。


  • 5/1/2010: blog 復活(で、結構、手間取ったよ)。