@@ -552,19 +552,88 @@ get_decomp_record(PyObject *self, Py_UCS4 code,
552552 (* index )++ ;
553553}
554554
555+ /* Small combining runs are usually cheaper with insertion sort. */
556+ #define CANONICAL_ORDERING_COUNTING_SORT_THRESHOLD 20
557+
558+ static void
559+ canonical_ordering_sort_insertion (int kind , void * data ,
560+ Py_ssize_t start , Py_ssize_t end )
561+ {
562+ for (Py_ssize_t i = start + 1 ; i < end ; i ++ ) {
563+ Py_UCS4 code = PyUnicode_READ (kind , data , i );
564+ unsigned char combining = _getrecord_ex (code )-> combining ;
565+ Py_ssize_t j = i ;
566+
567+ while (j > start ) {
568+ Py_UCS4 previous = PyUnicode_READ (kind , data , j - 1 );
569+ if (_getrecord_ex (previous )-> combining <= combining ) {
570+ break ;
571+ }
572+ PyUnicode_WRITE (kind , data , j , previous );
573+ j -- ;
574+ }
575+ if (j != i ) {
576+ PyUnicode_WRITE (kind , data , j , code );
577+ }
578+ }
579+ }
580+
581+ static void
582+ canonical_ordering_sort_counting (int kind , void * data ,
583+ Py_ssize_t start , Py_ssize_t end ,
584+ Py_UCS4 * sortbuf )
585+ {
586+ Py_ssize_t counts [256 ] = {0 };
587+ Py_ssize_t run_length = end - start ;
588+ Py_ssize_t total = 0 ;
589+ unsigned char min_combining = 255 ;
590+ unsigned char max_combining = 0 ;
591+
592+ for (Py_ssize_t i = start ; i < end ; i ++ ) {
593+ Py_UCS4 code = PyUnicode_READ (kind , data , i );
594+ unsigned char combining = _getrecord_ex (code )-> combining ;
595+ counts [combining ]++ ;
596+ if (combining < min_combining ) {
597+ min_combining = combining ;
598+ }
599+ if (combining > max_combining ) {
600+ max_combining = combining ;
601+ }
602+ }
603+
604+ for (Py_ssize_t i = min_combining ; i <= max_combining ; i ++ ) {
605+ Py_ssize_t count = counts [i ];
606+ counts [i ] = total ;
607+ total += count ;
608+ }
609+
610+ /* Reuse counts[] as the next output slot for each CCC. */
611+ for (Py_ssize_t i = start ; i < end ; i ++ ) {
612+ Py_UCS4 code = PyUnicode_READ (kind , data , i );
613+ unsigned char combining = _getrecord_ex (code )-> combining ;
614+ sortbuf [counts [combining ]++ ] = code ;
615+ }
616+ for (Py_ssize_t i = 0 ; i < run_length ; i ++ ) {
617+ PyUnicode_WRITE (kind , data , start + i , sortbuf [i ]);
618+ }
619+ }
620+
555621static PyObject *
556622nfd_nfkd (PyObject * self , PyObject * input , int k )
557623{
558624 PyObject * result ;
559625 Py_UCS4 * output ;
560626 Py_ssize_t i , o , osize ;
561- int kind ;
562- const void * data ;
627+ int input_kind , result_kind ;
628+ const void * input_data ;
629+ void * result_data ;
563630 /* Longest decomposition in Unicode 3.2: U+FDFA */
564631 Py_UCS4 stack [20 ];
565- Py_ssize_t space , isize ;
632+ Py_ssize_t space , isize , length ;
566633 int index , prefix , count , stackptr ;
567634 unsigned char prev , cur ;
635+ Py_UCS4 * sortbuf = NULL ;
636+ Py_ssize_t sortbuflen = 0 ;
568637
569638 stackptr = 0 ;
570639 isize = PyUnicode_GET_LENGTH (input );
@@ -584,11 +653,11 @@ nfd_nfkd(PyObject *self, PyObject *input, int k)
584653 return NULL ;
585654 }
586655 i = o = 0 ;
587- kind = PyUnicode_KIND (input );
588- data = PyUnicode_DATA (input );
656+ input_kind = PyUnicode_KIND (input );
657+ input_data = PyUnicode_DATA (input );
589658
590659 while (i < isize ) {
591- stack [stackptr ++ ] = PyUnicode_READ (kind , data , i ++ );
660+ stack [stackptr ++ ] = PyUnicode_READ (input_kind , input_data , i ++ );
592661 while (stackptr ) {
593662 Py_UCS4 code = stack [-- stackptr ];
594663 /* Hangul Decomposition adds three characters in
@@ -656,34 +725,64 @@ nfd_nfkd(PyObject *self, PyObject *input, int k)
656725 if (!result )
657726 return NULL ;
658727
659- kind = PyUnicode_KIND (result );
660- data = PyUnicode_DATA (result );
728+ result_kind = PyUnicode_KIND (result );
729+ result_data = PyUnicode_DATA (result );
730+ length = PyUnicode_GET_LENGTH (result );
661731
662- /* Sort canonically. */
732+ /* Sort each consecutive combining-character run canonically. */
663733 i = 0 ;
664- prev = _getrecord_ex (PyUnicode_READ (kind , data , i ))-> combining ;
665- for (i ++ ; i < PyUnicode_GET_LENGTH (result ); i ++ ) {
666- cur = _getrecord_ex (PyUnicode_READ (kind , data , i ))-> combining ;
667- if (prev == 0 || cur == 0 || prev <= cur ) {
668- prev = cur ;
734+ while (i < length ) {
735+ Py_ssize_t run_length , run_start ;
736+ int needs_sort = 0 ;
737+
738+ prev = _getrecord_ex (
739+ PyUnicode_READ (result_kind , result_data , i ))-> combining ;
740+ if (prev == 0 ) {
741+ i ++ ;
669742 continue ;
670743 }
671- /* Non-canonical order. Need to switch *i with previous. */
672- o = i - 1 ;
673- while (1 ) {
674- Py_UCS4 tmp = PyUnicode_READ (kind , data , o + 1 );
675- PyUnicode_WRITE (kind , data , o + 1 ,
676- PyUnicode_READ (kind , data , o ));
677- PyUnicode_WRITE (kind , data , o , tmp );
678- o -- ;
679- if (o < 0 )
680- break ;
681- prev = _getrecord_ex (PyUnicode_READ (kind , data , o ))-> combining ;
682- if (prev == 0 || prev <= cur )
744+
745+ run_start = i ++ ;
746+ while (i < length ) {
747+ cur = _getrecord_ex (
748+ PyUnicode_READ (result_kind , result_data , i ))-> combining ;
749+ if (cur == 0 ) {
683750 break ;
751+ }
752+ if (prev > cur ) {
753+ needs_sort = 1 ;
754+ }
755+ prev = cur ;
756+ i ++ ;
757+ }
758+ if (!needs_sort ) {
759+ continue ;
760+ }
761+
762+ run_length = i - run_start ;
763+ if (run_length < CANONICAL_ORDERING_COUNTING_SORT_THRESHOLD ) {
764+ canonical_ordering_sort_insertion (result_kind , result_data ,
765+ run_start , i );
766+ continue ;
684767 }
685- prev = _getrecord_ex (PyUnicode_READ (kind , data , i ))-> combining ;
768+
769+ if (run_length > sortbuflen ) {
770+ Py_UCS4 * new_sortbuf = PyMem_Realloc (sortbuf ,
771+ run_length * sizeof (Py_UCS4 ));
772+ if (new_sortbuf == NULL ) {
773+ PyErr_NoMemory ();
774+ PyMem_Free (sortbuf );
775+ Py_DECREF (result );
776+ return NULL ;
777+ }
778+ sortbuf = new_sortbuf ;
779+ sortbuflen = run_length ;
780+ }
781+
782+ canonical_ordering_sort_counting (result_kind , result_data ,
783+ run_start , i , sortbuf );
686784 }
785+ PyMem_Free (sortbuf );
687786 return result ;
688787}
689788
0 commit comments