入力されたデータの前処理

  • ヌル文字の削除
  • trim(全角スペースを含む)
  • 全角英数字を半角英数字、半角カナを全角カナに変換


app/app_contoroller.php

<?php
var $components = array( 'DataPrep' );
// or
var $components = array( 'DataPrep' => array( 'encoding' => 'SJIS-win' ) ); 


app/components/data_prep.php

<?php
/**
 * Controller::dataの前処理
 * ヌル文字の削除、trim(全角スペースを含む)、全角英数字を半角英数字、半角カナを全角カナに変換。
 *
 * Controller内
 * <code>
 * var $components = array( 'DataPrep' );
 * // or
 * var $components = array( 'DataPrep' => array( 'encoding' => 'SJIS-win' ) );
 * </code>
 * @uses Object
 * @package
 * @version $id$
 * @author oppara <oppara@oppara.tv>
 */
class DataPrepComponent extends Object {

    /**
     * 各エンコーディング毎の全角スペースの16進数表現
     *
     * iso-2022-jpは、一旦euc-jpに変換してから処理する必要がある。
     * (まず使用しないので無視)
     * @var array
     * @access private
     */
    private static $__mb_spaces = array( 
        'utf-8' => "\xe3\x80\x80",
        'euc-jp' => "\xA1\xA1",
        'eucjp-win' => "\xA1\xA1",
        'shift_jis' => "\x81\x40",
        'sjis-win' => "\x81\x40",
        // 'iso-2022-jp' => "\x1b\x24\x42\x21\x21\x1b\x28\x42",
        'ascii' => "\x20" 
    );

    /**
     * 使用するエンコーディング名
     *
     * @var sting
     * @access private
     */
    private static $__encoding;

    /**
     * 実行(Controller::beforeFilter()の前に呼び出される。
     *
     * @param Controller $controller
     * @param mixed $settings 
     * @return void
     * @access public
     */
    function initialize( &$controller, $settings ) {
        if ( $settings && isset( $settings['encoding'] ) ) {
            self::$__encoding = strtolower( $settings['encoding'] );
        }
        else {
            self::$__encoding = Configure::read( 'App.encoding' );
        }
        if ( !empty( $controller->data ) ) {
            array_walk( $controller->data, array( __CLASS__,'__prep' ) );
        }
    }

    /**
     * ヌル文字の削除、trim(全角スペースを含む)、全角英数字を半角英数字、半角カナを全角カナに変換。
     *
     * @param mixed $vals
     * @param string $key
     * @return void
     * @access private
     */
    function __prep( &$vals, $key ) {
        if ( is_array( $vals ) ) {
            array_walk( $vals, array( __CLASS__,'__prep' ) );
        }
        else {
            $vals = str_replace( "\0", '', $vals );
            $vals = trim( $vals );
            call_user_func_array( array( __CLASS__,'__mb_trim' ) , array( &$vals ) );
            $vals = mb_convert_kana( $vals, 'KVa', self::$__encoding );
        }
    }

    /**
     * 全角スペースを削除
     *
     * @param string $str
     * @return void
     * @access private
     */
    function __mb_trim( $str ) {
        $enc = strtolower( self::$__encoding );
        $spaces = self::$__mb_spaces;
        if ( !isset( $spaces[$enc] ) ) {
            $error = "$enc is unaccept. accept ";
            $error.= join( ', ', array_keys( $spaces ) );
            $error.= '.';
            trigger_error( $error, E_USER_ERROR );
        }
        $space = $spaces[$enc];
        $str = preg_replace( "/^(?:$space|\s)+/", '', $str );
        $str = preg_replace( "/(?:$space|\s)+$/", '', $str );
    }
}
?>

TODO

  • もっといい名前を考える
  • テスト
  • singleton

app/tests/cases/components/data_prep.test.php

<?php
App::import( 'Component', 'DataPrep' );
class DataPrepComponentTestController {}
class DataPrepTestCase extends CakeTestCase
{
    var $controller;
    function setup() {
        $this->controller = new DataPrepComponentTestController();
        $this->controller->DataPrep = new DataPrepComponent();
    }

    function testNull() {
        $this->controller->data = array( 'test' => " fo\0o " );
        $this->controller->DataPrep->initialize( $this->controller, null );
        $res = $this->controller->data;
        $this->assertEqual( $res['test'], 'foo' );
        
    }

    function testTrim() {
        $this->controller->data = array( 'test' => ' foo ' );
        $this->controller->DataPrep->initialize( $this->controller, null );
        $res = $this->controller->data;
        $this->assertEqual( $res['test'], 'foo' );
        
        $this->controller->data = array( 
            'test' => ' foo bar ',
            'test2' => ' bar baz '
        );
        $this->controller->DataPrep->initialize( $this->controller, null );
        $res = $this->controller->data;
        $this->assertEqual( $res['test'], 'foo bar' );
        $this->assertEqual( $res['test2'], 'bar baz' );
    }

    
    function testMbTrim() {
        // expected
        $expected = '申込表';
        // full-width space + $fixture + full-width
        $fixture_full = ' 申込表 ';
        // full-width space + half-width space + $fixture + half-width + full-width
        $fixture_mix = '  申込表  ';
        // utf-8
        $this->_testMbTrim( 'UTF-8', $fixture_full, $expected );
        $this->_testMbTrim( 'UTF-8', $fixture_mix, $expected );
        // euc-jp
        $this->_testMbTrim( 'EUC-JP', $fixture_full, $expected );
        $this->_testMbTrim( 'EUC-JP', $fixture_mix, $expected );
        // eucjp-win
        $this->_testMbTrim( 'EUCJP-WIN', $fixture_full, $expected );
        $this->_testMbTrim( 'EUCJP-WIN', $fixture_mix, $expected );
        // shift_jis
        $this->_testMbTrim( 'SHIFT_JIS', $fixture_full, $expected );
        $this->_testMbTrim( 'SHIFT_JIS', $fixture_mix, $expected );
        // sjis-win
        $this->_testMbTrim( 'SJIS-WIN', $fixture_full, $expected );
        $this->_testMbTrim( 'SJIS-WIN', $fixture_mix, $expected );
    }

    function _testMbTrim( $enc, $fixture, $expected ) {
        $org_enc = Configure::read( 'App.encoding' );
        Configure::write( 'App.encoding', $enc );
        $data = array( 'test' => $fixture );
        mb_convert_variables( $enc, $org_enc, $expected );
        mb_convert_variables( $enc, $org_enc, $data );
        $this->controller->data = $data;
        $this->controller->DataPrep->initialize( $this->controller, null );
        $res = $this->controller->data;
        $this->assertEqual( $res['test'], $expected );
        Configure::write( 'App.encoding', $org_enc );
    }
}
?>