2011-10-06
opencv¤Î¥¤¥ó¥¹¥È¡¼¥ë¥í¥°
OpenCV, C++, Python, È÷˺Ͽ | |
![]()
´Ä¶
- Ubuntu 10.04 64¥Ó¥Ã¥È
- python 2.6.5
¥¤¥ó¥¹¥È¡¼¥ë¥í¥°
´ðËÜŪ¤Ë
InstallGuide : Debian - OpenCV Wiki
¤Ë½ñ¤¤¤Æ¤¢¤ë¤È¤ª¤ê¼Â¹Ô¤·¤Þ¤·¤¿¡£
°ìÉô¥¤¥ó¥¹¥È¡¼¥ë¤¹¤ë¥Ñ¥Ã¥±¡¼¥¸¤òÊѹ¹¤·¤Æ¤¤¤Þ¤¹¡£
IPP¤äTBB¤Ï¥¤¥ó¥¹¥È¡¼¥ë¤·¤Æ¤¤¤Þ¤»¤ó¡£python¥é¥Ã¥Ñ¡¼¤Ï¥¤¥ó¥¹¥È¡¼¥ë¤·¤Þ¤·¤¿¡£
sudo aptitude -y install build-essential sudo aptitude -y install cmake sudo aptitude -y install pkg-config sudo aptitude -y install libpng12-0 libpng12-dev libpng++-dev libpng3 sudo aptitude -y install libpnglite-dev libpngwriter0-dev libpngwriter0c2 sudo aptitude -y install zlib1g-dbg zlib1g zlib1g-dev sudo aptitude -y install libjasper-dev libjasper-runtime libjasper1 sudo aptitude -y install pngtools libtiff4-dev libtiff4 libtiffxx0c2 libtiff-tools sudo aptitude -y install libjpeg62 libjpeg62-dev libjpeg62-dbg libjpeg-progs sudo aptitude -y install ffmpeg libavcodec-dev libavcodec52 libavformat52 libavformat-dev sudo aptitude -y install libgstreamer0.10-0-dbg libgstreamer0.10-0 libgstreamer0.10-dev sudo aptitude -y install libxine1-ffmpeg libxine-dev libxine1-bin sudo aptitude -y install libunicap2 libunicap2-dev sudo aptitude -y install libdc1394-22-dev libdc1394-22 libdc1394-utils sudo aptitude -y install swig sudo aptitude -y install libv4l-0 libv4l-dev sudo aptitude -y install python-numpy sudo aptitude -y install libpython2.6 python-dev python2.6-dev sudo aptitude -y install libjpeg-progs libjpeg-dev sudo aptitude -y install libgstreamer-plugins-base0.10-dev mkdir ocv cd ocv/ # sudo aptitude install subversion svn co https://code.ros.org/svn/opencv/trunk cd trunk/opencv/ mkdir release cd release/ cmake -D CMAKE_BUILD_TYPE=RELEASE -D CMAKE_INSTALL_PREFIX=/usr/local -D BUILD_PYTHON_SUPPORT=ON -D BUILD_EXAMPLES=ON .. make sudo make install sudo ldconfig cd unix-install pkg-config opencv --libs
ưºî¥Á¥§¥Ã¥¯
C++¤Ç¤Î¥Á¥§¥Ã¥¯
¥À¥¦¥ó¥í¡¼¥É¤·¤¿¥Õ¥¡¥¤¥ë¤Îtrunk/opencv/samples/cpp/¥Ç¥£¥ì¥¯¥È¥ê¤Î¥Õ¥¡¥¤¥ë¤¬¥³¥ó¥Ñ¥¤¥ë¤Ç¤¤ë¤«¤É¤¦¤«¡£
¥³¥ó¥Ñ¥¤¥ë¤Ï¡¢
g++ example.cc `pkg-config opencv --cflags --libs`
¤Ê¤É¡£
python¥é¥Ã¥Ñ¡¼¤Î¥Á¥§¥Ã¥¯
python¤Çimport cv¤¬À®¸ù¤¹¤ë¤«¤É¤¦¤«¡£
¥À¥¦¥ó¥í¡¼¥É¤·¤¿¥Õ¥¡¥¤¥ë¤Îtrunk/opencv/samples/python/delaunay.py¤¬python¤Ç¼Â¹Ô¤Ç¤¤ë¤«¤É¤¦¤«¡£
»²¹Í
2011-09-14
¸«½Ð¤·¸ì²½¤Î¹â®²½
nltk¤ÎWordNetLemmatizer¤òÎϤº¤¯¤Ç¹â®²½¤·¤¿¡£
´Ä¶
Python 2.6.5
¥³¡¼¥É
# -*- coding: utf-8 -*- from collections import defaultdict import nltk from nltk.corpus import wordnet as _wordnet _STEMMER = nltk.PorterStemmer().stem _LEMMATIZATION_POS_PRIORITY = (_wordnet.NOUN, _wordnet.VERB, _wordnet.ADJ, _wordnet.ADV) _POS_LIST = (_wordnet.ADJ, _wordnet.ADV, _wordnet.NOUN, _wordnet.VERB) def stem_form(form): return _STEMMER(form) def _detect_pos(form): form = form.replace(' ', '_') synsets = _wordnet.synsets(form) if not synsets: return None pos = None stem = stem_form(form) for synset in synsets: if stem_form(synset.name[:-5]) == stem: pos = synset.pos break if pos is None: pos = synsets[0].pos if pos == _wordnet.ADJ_SAT: pos = _wordnet.ADJ return pos def lemmatize_with_wordnet(form, pos=None): if pos is None: pos = _detect_pos(form) if not pos: return form assert(pos in _POS_LIST) return nltk.WordNetLemmatizer().lemmatize(form, pos=pos) def _lemmatize_form_with_wordnet(form, pos_set): assert(pos_set) if len(pos_set) == 1: target_pos = pos_set.copy().pop() else: target_pos = _detect_pos(form) if not target_pos or target_pos not in pos_set: for pos in _LEMMATIZATION_POS_PRIORITY: if pos in pos_set: target_pos = pos break assert(target_pos in _POS_LIST) return nltk.WordNetLemmatizer().lemmatize(form, pos=target_pos) def _construct_inflected_form_to_lemma_dictionary(): all_inflected_forms = defaultdict(set) for pos, excepted_forms in _wordnet._exception_map.iteritems(): if pos == _wordnet.ADJ_SAT: continue for excepted_form in excepted_forms: all_inflected_forms[excepted_form].add(pos) for pos in _POS_LIST: substitutions = _wordnet.MORPHOLOGICAL_SUBSTITUTIONS[pos] for lemma in _wordnet.all_lemma_names(pos=pos): lemma = lemma.replace('_', ' ') all_inflected_forms[lemma].add(pos) form = lemma if pos == _wordnet.NOUN and form.endswith('ful'): suffix = 'ful' form = form[:-3] # len('ful') else: suffix = '' for new_suffix, old_suffix in substitutions: if form.endswith(old_suffix) or old_suffix == '': if old_suffix == '': inflected_form = form + new_suffix else: inflected_form = form[:-len(old_suffix)] + new_suffix inflected_form += suffix all_inflected_forms[inflected_form].add(pos) inflected_form_to_lemma = {} for inflected_form, pos_set in all_inflected_forms.iteritems(): lemma = _lemmatize_form_with_wordnet(inflected_form, pos_set) inflected_form_to_lemma[inflected_form] = lemma.replace('_', ' ') return inflected_form_to_lemma _INFLECTED_FORM_TO_LEMMA = _construct_inflected_form_to_lemma_dictionary() def lemmatize_with_dict(form): try: return _INFLECTED_FORM_TO_LEMMA[form.lower().replace(' ', '_')] except KeyError: pass return form.lower() def _test(): test_forms = ('media', 'playing', 'player', 'possesses', 'sung', 'became', 'begun', 'fallen', 'men', 'buses', 'initial', 'initialization') print 'original\tstemming\twordnet_lemmatizer\tlemmatize_with_dict' for form in test_forms: print ('{0}\t{1}\t{2}\t{3}').format(form, stem_form(form), lemmatize_with_wordnet(form), lemmatize_with_dict(form)) if __name__ == '__main__': _test() ''' original stemming wordnet_lemmatizer lemmatize_with_dict media media medium medium playing play playing playing player player player player possesses possess posse possess sung sung sung sung became becam become become begun begun begin begin fallen fallen fall fall men men man man buses buse bus bus initial initi initial initial initialization initi initialization initialization '''
nltk¤ÎWordNetLemmatizer¤Îưºî¤Ï¡¢ÆþÎϸì¶ç¤ËÂФ·¤Æ°Ê²¼¤Î¼ê½ç¤Ç¸«½Ð¤·¸ì²½¤ò¹Ô¤Ã¤Æ¤¤¤ë¤è¤¦¤Ç¤¹¡£
- ÆþÎϸì¶ç¤¬Îã³°¥ê¥¹¥È¤ËºÜ¤Ã¤Æ¤¤¤Ê¤¤¤«¤É¤¦¤«¥Á¥§¥Ã¥¯¤¹¤ë¡£ºÜ¤Ã¤Æ¤¤¤ì¤ÐÊÌÅÓ½èÍý¤¹¤ë¡£
- ËöÈøÃÖ´¹¥ë¡¼¥ë¤òÍѤ¤¤ÆÆþÎϸì¶ç¤ÎËöÈø¤òÃÖ´¹¤¹¤ë¡£
- ¼½ñ¤Î¸«½Ð¤·¸ì¤Ë¸ì¶ç¤¬Â¸ºß¤·¤Æ¤¤¤ë¤«¤É¤¦¤«¤ò¥Á¥§¥Ã¥¯¤¹¤ë¡£
- ËöÈøÃÖ´¹¥ë¡¼¥ë¤¬Ìµ¤¯¤Ê¤ë¤Þ¤Ç¼ê½ç2.¤È3.¤ò·«¤êÊÖ¤¹¡£
¼ê½ç1.¤ÎÎã³°¥ê¥¹¥È¤È¤Ï¡¢¼ç¤ËÉÔµ¬Â§ÊѲ½(sing-sang-sung¤Ê¤É)¤ò²ò·è¤¹¤ë¤¿¤á¤Î¼ê½ç¤Î¤è¤¦¤Ç¤¹¡£
¼ê½ç2.¤È¼ê½ç3.¤Ç¤Ï¡¢¤¢¤é¤«¤¸¤áÍѰդµ¤ì¤¿ËöÈøÃÖ´¹¥ë¡¼¥ë¤ò»ÈÍѤ·¤Þ¤¹¡£
Î㤨¤ÐÆþÎϸì¶ç¤¬Ì¾»ì¤Ç¤¢¤ê¡¢xes¤Ç½ª¤ï¤Ã¤Æ¤¤¤ë¾ì¹ç¡¢ËöÈø¤«¤éxes¤ò¼è¤ê½ü¤¡¢Âå¤ï¤ê¤Ëx¤òÄɲ乤롣¤½¤Î¸å¡¢ÃÖ´¹¤·¤¿Ã±¸ì¤¬¼½ñ¤Î¸«½Ð¤·¸ì¤Ë¤¢¤ë¤«¤É¤¦¤«¤ò¥Á¥§¥Ã¥¯¤¹¤ë¡£Î㤨¤ÐÆþÎϸì¶ç¤¬boxes¤Ç¤¢¤ë¤È¤¡¢ËöÈø¤Îxes¤òx¤ËÃÖ´¹¤·box¤È¤·¡¢¤½¤Î¸åbox¤¬¼½ñ¤Î¸«½Ð¤·¸ì¤Ë¤¢¤ë¤«¤É¤¦¤«¥Á¥§¥Ã¥¯¤¹¤ë¡£
¥½¡¼¥¹¥³¡¼¥É¤òÆÉ¤à¸Â¤ê¡¢nltk¤ÎWordNetLemmatizer¤ÏËܲÈWordNet¤ÎC¸À¸ì¼ÂÁõ¤ÈƱÍͤμê½ç¤Ç¸«½Ð¤·¸ì²½¤ò¹Ô¤Ã¤Æ¤¤¤ë¤é¤·¤¤¡£
ËöÈø¤ÎÃÖ´¹¡¢¸¡º÷¡¢¤ò·«¤êÊÖ¤·¤Æ¤¤¤ë¤Î¤Ç¼Â¹Ô®ÅÙ¤¬ÃÙ¤¤¡£
¤½¤³¤Ç¡¢¤³¤ì¤éÎã³°¥ê¥¹¥È¤ÈÃÖ´¹¥ë¡¼¥ë¤ò»ÈÍѤ·¤Æ¡¢WordNetLemmatizer¤¬½èÍý¤Ç¤¤ë¸ì¶ç¤ò¤¹¤Ù¤ÆÀ¸À®¤¹¤ë¡£
¤½¤ì¤éWordNetLemmatizer¤¬½èÍý¤Ç¤¤ë¸ì¶ç¤ò¥¡¼¤È¤·¡¢¤½¤ì¤é¤Î¸«½Ð¤·¸ì¤òÃͤȤ¹¤ë¼½ñ(_INFLECTED_FORM_TO_LEMMA)¤òºîÀ®¤·¤¿¡£
{'boxes': 'box', 'box': 'box', ..., 'media': 'medium', 'medium': 'medium', ...}¤Î¤è¤¦¤Ê³èÍÑ·Á¤¬¥¡¼¤Ç¤¢¤ê¡¢¸«½Ð¤·¸ì¤¬ÃͤǤ¢¤ë¼½ñ¡£
¼Â¹Ô®Å٥ƥ¹¥È
American National Corpus¤«¤éÃê½Ð¤·¤¿16,814,123¸Ä¤Î±Ññ¸ì(296,528¼ïÎà)¤ËÂФ·¤Æ¡¢¸«½Ð¤·¸ì²½¤Ë¤«¤«¤ë»þ´Ö¤ò¬¤Ã¤¿¡£
nltk¤ÎWordNetLemmatizer¤Ë¤è¤ë¸«½Ð¤·¸ì²½(lemmatize_with_wordnet)¤Ç¤Ï3583ÉÃ(¤ª¤è¤½1»þ´Ö)¡¢
nltk¤ÎWordNetLemmatizer¤Ë¤è¤ë¸«½Ð¤·¸ì²½(lemmatize_with_wordnet)¤ÎÆþ½ÐÎϤò¥¥ã¥Ã¥·¥å¤·¤¿¾ì¹ç¤Ç¤Ï80Éá¢
º£²óºîÀ®¤·¤¿ºîÀ®¤·¤¿¼½ñ¤ò»ÈÍѤ·¤¿¸«½Ð¤·¸ì²½(lemmatize_with_dict)¤Ç¤Ï20Éá¢
¤È¤Ê¤Ã¤¿¡£·ë¶ÉÆþ½ÐÎϤò¥¥ã¥Ã¥·¥å¤·¤¿¾ì¹ç¤ÈÂ纹̵¤¤·ë²Ì¤Ë¤Ê¤Ã¤¿¡£
¤·¤«¤·¡¢Æþ½ÐÎϤò¥¥ã¥Ã¥·¥å¤¹¤ë¾ì¹ç¡¢WordNet¤ËÅÐÏ¿¤µ¤ì¤Æ¤¤¤Ê¤¤Ã±¸ì¤ä¸ÇÍ̾»ì¤Ê¤É¤¬ÆþÎϤµ¤ì¤ë¤¿¤Ó¡¢¥¥ã¥Ã¥·¥å¤Î¥µ¥¤¥º¤¬Â礤¯¤Ê¤ë¡£
¤½¤ÎÅÀ¡¢º£²óºîÀ®¤·¤¿¼½ñ¤Î¥µ¥¤¥º¤Ï¸ÇÄꤵ¤ì¤Æ¤¤¤ë¤Î¤Ç¡¢¥á¥â¥ê¾ÃÈñÎ̤ÎÁý²Ã¤Ê¤É¤òµ¤¤Ë¤·¤Ê¤¯¤ÆÎɤ¤¡£
º£²óºîÀ®¤·¤¿¼½ñ¤Ë¤âÌäÂêÅÀ¤¬¤¢¤ë¡£º£²óºîÀ®¤·¤¿¼½ñ¤Ç¤Ï¡¢ÆþÎϸì¶ç¤«¤é½ÐÎϤ¬°ì°Õ¤Ë·è¤Þ¤ë¤¬¡¢Ä̾ï¤Ï°ì°Õ¤Ë¤Ï·è¤Þ¤é¤Ê¤¤¡£Î㤨¤Ð¡¢better¤Î¸«½Ð¤·¸ì¤Ïwell¤«good¤«¤ÏÉÊ»ì¤òÍѤ¤¤Ê¤¤¸Â¤ê·è¤á¤Ë¤¯¤¤¡£
2011-08-06
python¤Ë¤è¤ëʸ»úÎó¤ÎÀµµ¬²½
¥Æ¥¥¹¥È¥Þ¥¤¥Ë¥ó¥°¤Ê¤É¤ò¹Ô¤¦¤¿¤á¤Ë¤Ïʸ½ñ¡¢Ê¸¡¢Ã±¸ì¤Ê¤É¤Îʸ»úÎó¤ÎÀµµ¬²½¤¬½ÅÍפǤ¹¡£
ñ¸ì¤ÎÂçʸ»ú¾®Ê¸»ú¤ÎÅý°ì¡¢È¾³ÑÁ´³Ñ¤ÎÅý°ì¤Ê¤É¤ò¤¹¤ëɬÍפ¬¤¢¤ê¤Þ¤¹¡£
ʸ»úÎó¤ÎÀµµ¬²½¤Î¤¿¤á¤ËÍøÍѤ·¤Æ¤¤¤ëpython¥³¡¼¥É¤ò°Ê²¼¤Ë½ñ¤¤¤Æ¤ª¤¤Þ¤¹¡£
º£¸åÁý¤¨¤ë²ÄǽÀ¤â¤¢¤ê¤Þ¤¹¡£
¼Â¹Ô´Ä¶
Ubuntu 10.04 64¥Ó¥Ã¥È
python 2.6.5
unicode·¿¤ËÊÑ´¹¤¹¤ë
def unicode_ignore_invalid_char(text): if isinstance(text, str): return text.decode('utf-8', 'ignore') return text
ÊÑ´¹ÉÔǽ¤Êʸ»úÎó¤ò̵»ë¤·¤Æstr·¿¤«¤éunicode·¿¤ËÊÑ´¹¤¹¤ë¡£
str·¿¤ËÊÑ´¹¤¹¤ë
def str_ignore_invalid_char(text): if isinstance(text, unicode): return text.encode('utf-8', 'ignore') return text
ÊÑ´¹ÉÔǽ¤Êʸ»úÎó¤ò̵»ë¤·¤Æunicode·¿¤«¤éstr·¿¤ËÊÑ´¹¤¹¤ë¡£
Æþ½ÐÎϤÎʸ»úÎ󷿤òÅý°ì¤¹¤ë
from functools import wraps def consistent_texttype(function): @wraps(function) def _consistent_texttype(*args, **kwargs): assert(1 <= len(args)) input_text = args[0] is_unicode = False if isinstance(input_text, unicode): is_unicode = True elif not isinstance(input_text, str): is_unicode = isinstance(input_text[0], unicode) # for collections output_text = function(*args, **kwargs) if isinstance(output_text, unicode) or isinstance(output_text, str): if is_unicode: return unicode_ignore_invalid_char(output_text) return str_ignore_invalid_char(output_text) if is_unicode: return map(unicode_ignore_invalid_char, output_text) return map(str_ignore_invalid_char, output_text) return _consistent_texttype
ÆþÎÏʸ»úÎó¤¬str·¿¤Ç¤¢¤ë¤È¤½ÐÎÏʸ»úÎó¤âstr·¿¤Ë¤·¡¢ÆþÎÏʸ»úÎó¤¬unicode·¿¤Ç¤¢¤ë¤È¤½ÐÎÏʸ»úÎó¤âunicode·¿¤Ë¤¹¤ë¥Ç¥³¥ì¡¼¥¿¡£
unicode¤òÀµµ¬²½¤¹¤ë
import unicodedata @consistent_texttype def normalize_unicode(text, form='NFKC'): assert(form in ('NFC', 'NFKC', 'NFD', 'NFKD')) unicode_text = unicode_ignore_invalid_char(text) normalized_text = unicodedata.normalize(form, unicode_text) return normalized_text
Ⱦ³Ñ¥«¥¿¥«¥Ê¤òÁ´³Ñ¥«¥¿¥«¥Ê¤ËÊÑ´¹¤·¤¿¤ê¤¹¤ë¡£
Î㤨¤ÐϤò¥Ú¡¼¥¸¡¢ŽÊŽÝ޶ޏ޶ŽÅ¤ò¥Ï¥ó¥«¥¯¥«¥Ê¤ËÊÑ´¹¤¹¤ë¡£
HTML¥¨¥ó¥Æ¥£¥Æ¥£¤òÊÑ´¹¤¹¤ë
from BeautifulSoup import BeautifulSoup @consistent_texttype def unescape_entities_with_beautifulsoup(htmltext, prettify=False): soup = BeautifulSoup(htmltext, convertEntities=BeautifulSoup.HTML_ENTITIES) if prettify: return soup.prettify() return soup.__repr__()
BeautifulSoup¤òÍøÍѤ·¤ÆHTML¥¨¥ó¥Æ¥£¥Æ¥£¤òÊÑ´¹¤¹¤ë¡£
Î㤨¤Ð>¤ò>¤ËÊÑ´¹¤¹¤ë¡£
from BeautifulSoup import BeautifulStoneSoup @consistent_texttype def unescape_entities_with_beautifulstonesoup(htmltext, prettify=False): soup = BeautifulStoneSoup(htmltext, convertEntities=BeautifulStoneSoup.HTML_ENTITIES) if prettify: return soup.prettify() return soup.__repr__()
BeautifulStoneSoup¤òÍøÍѤ·¤ÆHTML¥¨¥ó¥Æ¥£¥Æ¥£¤òÊÑ´¹¤¹¤ë¡£
BeautifulSoup¤òÍøÍѤ·¤¿¾ì¹ç¤ÈƱ¤¸¤«¤â¤·¤ì¤Ê¤¤¡£
from htmlentitydefs import name2codepoint import re # derived from BeautifulSoup # __author__ = "Leonard Richardson (leonardr@segfault.org)" # __version__ = "3.1.0.1" # __copyright__ = "Copyright (c) 2004-2009 Leonard Richardson" # __license__ = "New-style BSD" def _unescape_entity(match): x = match.group(1) if x in name2codepoint: return unichr(name2codepoint[x]) elif 0 < len(x) and x[0] == '#': if 1 < len(x) and x[1] == 'x': return unichr(int(x[2:], 16)) return unichr(int(x[1:])) return u'&{0};'.format(x) @consistent_texttype def unescape_entities(htmltext): unicode_htmltext = unicode_ignore_invalid_char(htmltext) unescaped_text = re.sub(u'&(#\d+|#x[0-9a-fA-F]+|\w+);', _unescape_entity, unicode_htmltext) assert(isinstance(unescaped_text, unicode)) return unescaped_text
BeautifulSoup¤ÎHTML¥¨¥ó¥Æ¥£¥Æ¥£ÊÑ´¹Éôʬ¤òÃê½Ð¤·¡¢¾¯¤·Êѹ¹¤ò²Ã¤¨¤¿¤â¤Î¡£
HTML¥¨¥ó¥Æ¥£¥Æ¥£ÊÑ´¹¤Î¤¿¤á¤À¤±¤ËBeautifulSoup¤òÍøÍѤ¹¤ë¤Î¤Ï¹â²Á¤¹¤®¤ë¤È¹Í¤¨¤ë¤È¤¤Ï¤³¤Á¤é¤òÍøÍѤ¹¤ë¡£
BeautifulSoup¤ÏNew-style BSD¥é¥¤¥»¥ó¥¹¤Ç¤¹¡£
¸ì´´¤òÃê½Ð¤¹¤ë(¥¹¥Æ¥ß¥ó¥°)
import nltk @consistent_texttype def stem_term(term, porter=True): if porter: return nltk.PorterStemmer().stem(term) return nltk.LancasterStemmer().stem(term)
±Ñ¸ìÍÑ¡£Î㤨¤Ðinitial, initialize¤òiniti¤Ë¤¹¤ë¡£
¸«½Ð¤·¸ì²½¡¦¥ì¥ó¥Þ²½(lemmatization)
import nltk from nltk.corpus import wordnet @consistent_texttype def lemmatize_term(term, pos=None): if pos is None: synsets = wordnet.synsets(term) if not synsets: return term pos = synsets[0].pos if pos == wordnet.ADJ_SAT: pos = wordnet.ADJ assert(pos in (wordnet.NOUN, wordnet.VERB, wordnet.ADJ, wordnet.ADV)) return nltk.WordNetLemmatizer().lemmatize(term, pos=pos)
±Ñ¸ìÍÑ¡£WordNet¤òÍѤ¤¤ÆÃ±¸ì¤Î¸«½Ð¤·¸ì²½¤ò¹Ô¤¦¡£
Î㤨¤Ðis, are¤òbe¤Ë¡¢potatos¤òpotato¤Ë¤¹¤ë¡£
ÉÊ»ì(pos)¤Î»ØÄ꤬¤Ê¤±¤ì¤Ðsynsets¤Î¤¦¤Á¡¢°ìÈֺǽé¤Ë¸½¤ì¤ëÉÊ»ì¤ò»ÈÍѤ¹¤ë¡£
¾®Ê¸»ú¤Ë¤¹¤ë
text.lower()
¤â¤·¤¯¤Ï¡¢
import string def lower_text(text): return string.lower(text)
Âçʸ»ú¤Ë¤¹¤ë
text.upper()
¤â¤·¤¯¤Ï¡¢
import string def upper_text(text): return string.upper(text)
ÀèÆ¬¤Î¤ßÂçʸ»ú¤Ë¤¹¤ë
text.capitalize()
¤â¤·¤¯¤Ï¡¢
import string def capitalize_text(text): return string.capitalize(text)
»²¹Í¥µ¥¤¥È
UnicodeDecodeError¤¬È¯À¸¤¹¤ëʸ»ú¤òignore¥ª¥×¥·¥ç¥ó¤Ç̵»ë¤¹¤ë - Pyro Memo
»²¹Íʸ¸¥
- ºî¼Ô: Steven Bird,Ewan Klein,Edward Loper,Ç븶Àµ¿Í,Ãæ»³·É¹,¿åÌîµ®ÌÀ
- ½ÐÈǼÒ/¥á¡¼¥«¡¼: ¥ª¥é¥¤¥ê¡¼¥¸¥ã¥Ñ¥ó
- ȯÇäÆü: 2010/11/11
- ¥á¥Ç¥£¥¢: Âç·¿ËÜ
- ¹ØÆþ: 20¿Í ¥¯¥ê¥Ã¥¯: 639²ó
- ¤³¤Î¾¦Éʤò´Þ¤à¥Ö¥í¥° (44·ï) ¤ò¸«¤ë
2011-07-22
nkf python ¥¤¥ó¥¿¡¼¥Õ¥§¡¼¥¹¤Î¥¤¥ó¥¹¥È¡¼¥ë
´Ä¶
- Ubuntu 10.04 32¥Ó¥Ã¥È
- python 2.6.5
¥¤¥ó¥¹¥È¡¼¥ëÊýË¡
$ mkdir temp # ºî¶ÈÍѥǥ£¥ì¥¯¥È¥ê¤ÎºîÀ® $ cd temp # http://sourceforge.jp/projects/nkf/ ¤«¤é nkf-2.1.1.tar.gz ¤ò¥À¥¦¥ó¥í¡¼¥É temp$ tar zxvf nkf-2.1.1.tar.gz temp$ cd nkf-2.1.1/ temp/nkf-2.1.1$ wget ftp://city.plala.jp:1221/NkfPython/NKF_python20090602.tgz temp/nkf-2.1.1$ tar zxvf NKF_python20090602.tgz temp/nkf-2.1.1$ cd NKF.python/ temp/nkf-2.1.1/NKF.python$ sudo python setup.py install # Python.h: No such file or directory ¤Èɽ¼¨¤µ¤ì¤¿¾ì¹ç¤Ï¡¢°Ê²¼¤ò¼Â¹Ô¤¹¤ë # sudo aptitude install python-dev temp/nkf-2.1.1/NKF.python$ cd ../../../ $ sudo rm -rf temp
»ÈÍÑÊýË¡
import nkf flag = '-w' output = nkf.nkf(flag, input_text) input_code = nkf.guess(input_text)
»²¹Í¥ê¥ó¥¯
2011-05-28
Python¤Ë¤ª¤±¤ëʹԽèÍý¤Ë¤Ä¤¤¤Æ
Python, ¥×¥í¥°¥é¥ß¥ó¥° | |
![]()
µ¤¤Ë¤Ê¤Ã¤¿¤Î¤ÇPython¤ÎGIL(Global Interpreter Lock)¤¬Ê¹ԽèÍý¤Ë¤É¤ÎÄøÅٱƶÁ¤¹¤ë¤«¤Ë¤Ä¤¤¤Æ¾¯¤·¼Â¸³¤·¤Þ¤·¤¿¡£
¤Ï¤¸¤á¤Ë
¤Þ¤º¡¢¡Öʹԡ׽èÍý¤È¡ÖÊÂÎó¡×½èÍý¤È¤¤¤¦¸ÀÍÕ¤ò¶èÊ̤¹¤ëɬÍפ¬¤¢¤ê¤Þ¤¹¡£²¼µ»²¹Íʸ¸¥¤Î¡Öʹԥ³¥ó¥Ô¥å¡¼¥¿µ»Ë¡¡×¤Ë¤è¤ë¤È¡¢
¥·¥¹¥Æ¥à¤¬Ê£¿ô¤Îưºî¤òƱ»þ¤Ë¼Â¹Ô¾õÂÖ(in progress)¤ËÊݤƤ뵡ǽ¤òÈ÷¤¨¤Æ¤¤¤ë¾ì¹ç¤òʹÔ(concurrent)¤È¸À¤¤¡¢ Ê£¿ô¤Îưºî¤òƱ»þ¤Ë¼Â¹Ô¤Ç¤¤ë¾ì¹ç¤òÊÂÎó(parallel)¤È¸À¤¤¤Þ¤¹¡£ ½ÅÍפʳµÇ°¡¢°ã¤¤¤Ï¡Ö¼Â¹Ô¾õÂ֡פȤ¤¤¦ÅÀ¤Ç¤¹¡£ ...ÃæÎ¬... ¡ÖʹԡפϡÖÊÂÎó¡×¤ò´Þͤ·¤Þ¤¹¡£
¤À¤½¤¦¤Ç¤¹¡£Æ±½ñ¤Ë¤è¤ë¤È¡¢1¤Ä¤ÎCPU¥³¥¢¤¬2¤Ä¤Î¥¹¥ì¥Ã¥É¤òÀÚ¤êÂØ¤¨¤Ê¤¬¤é½èÍý¤¹¤ë¾ì¹ç¤Ï¡Öʹԡ׽èÍý¤Ë´Þ¤Þ¤ì¤ë¤è¤¦¤Ç¤¹¡£¡ÖÊÂÎó¡×½èÍý¤Ç¤ÏÊ£¿ô¤ÎCPU¥³¥¢¤¬É¬¿Ü¤Ç¡¢Ê£¿ô¤Î¥¹¥ì¥Ã¥É¤¬Ê£¿ô¤ÎCPU¥³¥¢¤Ë¤è¤êƱ»þ¤Ë¼Â¹Ô¤µ¤ì¤ë»ö¤ò¡ÖÊÂÎó¡×½èÍý¤È¸À¤¦¤è¤¦¤Ç¤¹¡£
Ìܻؤ¹¤Ù¤¤Ï¡Öʹԡ׽èÍý¤Ç¤Ï¤Ê¤¯¡ÖÊÂÎó¡×½èÍý¤Êµ¤¤¬¤·¤Þ¤¹¡£
¥Æ¥¹¥È´Ä¶
- Intel Core i7-920 Processor (2.66 GHz ¡ß 8)
- Memory 9GB
- Ubuntu Lucid 10.04 64bit
- Python 2.6.5
¥Æ¥¹¥È¥³¡¼¥É
1. Ã༡½èÍý¤Ç¤ÎCPUÉé²Ù¤ÎÂ礤¤½èÍý
# sequential_cpu.py def _cpu_bound_work(): i = 0 while i < 100000000: i += 1 if __name__ == '__main__': for _ in xrange(8): _cpu_bound_work()
2. threading¤Ç¤ÎCPUÉé²Ù¤ÎÂ礤¤½èÍý
# threading_cpu.py import threading def _cpu_bound_work(): i = 0 while i < 100000000: i += 1 class TestThread(threading.Thread): def run(self): _cpu_bound_work() if __name__ == '__main__': mainthread = threading.currentThread() for _ in xrange(8): thread = TestThread() thread.start() for thread in threading.enumerate(): if mainthread != thread: thread.join()
3. multiprocessing¤Ë¤è¤ëCPUÉé²Ù¤ÎÂ礤¤½èÍý
# multiprocessing_cpu.py import multiprocessing def _cpu_bound_work(): i = 0 while i < 100000000: i += 1 class TestProcess(multiprocessing.Process): def run(self): _cpu_bound_work() if __name__ == '__main__': for _ in xrange(8): process = TestProcess() process.start() for process in multiprocessing.active_children(): process.join()
4. Ã༡½èÍý¤Ë¤è¤ëIOÂÔ¤Á¤ÎÂ礤¤½èÍý
# sequential_io.py import time def _io_bound_work(): time.sleep(10.0) # to simulate i/o bound work if __name__ == '__main__': for _ in xrange(8): _io_bound_work()
5. threading¤Ë¤è¤ëIOÂÔ¤Á¤ÎÂ礤¤½èÍý
# threading_io.py import threading import time def _io_bound_work(): time.sleep(10.0) # to simulate i/o bound work class TestThread(threading.Thread): def run(self): _io_bound_work() if __name__ == '__main__': mainthread = threading.currentThread() for _ in xrange(8): thread = TestThread() thread.start() for thread in threading.enumerate(): if mainthread != thread: thread.join()
6. multiprocessing¤Ë¤è¤ëIOÂÔ¤Á¤ÎÂ礤¤½èÍý
# multiprocessing_io.py import multiprocessing import time def _io_bound_work(): time.sleep(10.0) # to simulate i/o bound work class TestProcess(multiprocessing.Process): def run(self): _io_bound_work() if __name__ == '__main__': for _ in xrange(8): process = TestProcess() process.start() for process in multiprocessing.active_children(): process.join()
¥Æ¥¹¥È·ë²Ì
1. Ã༡½èÍý¤Ë¤è¤ëCPUÉé²Ù¤ÎÂ礤¤½èÍý
$ time python sequential_cpu.py real 0m45.265s user 0m45.230s sys 0m0.020s
2. threading¤Ë¤è¤ëCPUÉé²Ù¤ÎÂ礤¤½èÍý
$ time python threading_cpu.py real 1m8.033s user 1m7.420s sys 0m16.930s
3. multiprocessing¤Ë¤è¤ëCPUÉé²Ù¤ÎÂ礤¤½èÍý
$ time python multiprocessing_cpu.py real 0m10.969s user 1m24.960s sys 0m0.040s
4. Ã༡½èÍý¤Ë¤è¤ëIOÂÔ¤Á¤ÎÂ礤¤½èÍý
$ time python sequential_io.py real 1m20.095s user 0m0.010s sys 0m0.010s
5. threading¤Ë¤è¤ëIOÂÔ¤Á¤ÎÂ礤¤½èÍý
$ time python threading_io.py real 0m10.029s user 0m0.020s sys 0m0.000s
6. multiprocessing¤Ë¤è¤ëIOÂÔ¤Á¤ÎÂ礤¤½èÍý
$ time python multiprocessing_io.py real 0m10.035s user 0m0.020s sys 0m0.010s
¤Þ¤È¤á
1,4¤ÎÃ༡½èÍý¤¬ÃÙ¤¤¤Î¤ÏÅöÁ³¤È¤·¤Æ¡¢2¤Îthreading¥â¥¸¥å¡¼¥ë¤ò»ÈÍѤ·¤ÆCPUÉé²Ù¤ÎÂ礤¤½èÍý¤ò¹Ô¤Ã¤¿¾ì¹ç¤Î¼Â¹Ô®ÅÙ¤¬¤«¤Ê¤êÃÙ¤¤¤Ç¤¹¡£
python¤ÎGIL¤Î±Æ¶Á¤Ç¡¢ÊÂÎó½èÍý¤¬¤Ç¤¤Æ¤¤¤Ê¤¤»ö¤¬¸¶°ø¤Ê¤Î¤Ç¤·¤ç¤¦¡£3¤Îmultiprocessing¥â¥¸¥å¡¼¥ë¤ò»ÈÍѤ·¤¿¾ì¹ç¤ÏGIL¤ò²óÈò¤Ç¤¤ë¤è¤¦¤Ç¤¹¡£
ÊÂÎó½èÍý¤Ç¤¤Ê¤¤threading¥â¥¸¥å¡¼¥ë¤ò»È¤¦°ÕÌ£¤Ï¤¢¤ë¤Î¤Ç¤·¤ç¤¦¤«¡£¾¯¤Ê¤¯¤È¤â5¤Î¤è¤¦¤Ë¡¢IOÂÔ¤Á»þ´Ö¤¬Ä¹¤¤½èÍý¤òÊ£¿ô²ó¹Ô¤¦¾ì¹ç¤Ïthreading¥â¥¸¥å¡¼¥ë¤ò»ÈÍѤ¹¤ë°ÕÌ£¤Ï¤¢¤ë¤è¤¦¤Ç¤¹¡£web¥Ú¡¼¥¸¤Î¥¯¥í¡¼¥é¤Ê¤É¤Ë¤Ï¸þ¤¤¤Æ¤¤¤ë¤è¤¦¤Ç¤¹¡£
¤ä¤Ï¤êthreading¥â¥¸¥å¡¼¥ë¤è¤ê¤âmultiprocessing¥â¥¸¥å¡¼¥ë¤ò»ÈÍѤ·¤¿Êý¤¬¤¤¤¤µ¤¤¬¤¹¤ë¡£(¤â¤·¤¯¤Ïos.fork¤ò»ÈÍѤ¹¤ë¤«)
»²¹Í
17.2. multiprocessing ? Process-based parallelism — Python v3.4.0a0 documentation
Tricorn Labs » Python 2.6 multiprocessing package ¤ò¿¨¤Ã¤Æ¤ß¤¿¡£ [GIL²óÈò]
ʹԥ³¥ó¥Ô¥å¡¼¥Æ¥£¥ó¥°µ»Ë¡ ¡½¼ÂÁ©¥Þ¥ë¥Á¥³¥¢/¥Þ¥ë¥Á¥¹¥ì¥Ã¥É¥×¥í¥°¥é¥ß¥ó¥°
- ºî¼Ô: Clay Breshears,Àé½»¼£Ïº
- ½ÐÈǼÒ/¥á¡¼¥«¡¼: ¥ª¥é¥¤¥ê¡¼¥¸¥ã¥Ñ¥ó
- ȯÇäÆü: 2009/12/21
- ¥á¥Ç¥£¥¢: Âç·¿ËÜ
- ¹ØÆþ: 12¿Í ¥¯¥ê¥Ã¥¯: 598²ó
- ¤³¤Î¾¦Éʤò´Þ¤à¥Ö¥í¥° (36·ï) ¤ò¸«¤ë
