Package org.jcodings

Class MultiByteEncoding

    • Field Detail

      • EncLen

        protected final int[] EncLen
      • Trans

        protected final int[][] Trans
      • TransZero

        protected final int[] TransZero
    • Constructor Detail

      • MultiByteEncoding

        protected MultiByteEncoding​(String name,
                                    int minLength,
                                    int maxLength,
                                    int[] EncLen,
                                    int[][] Trans,
                                    short[] CTypeTable)
    • Method Detail

      • length

        public int length​(byte c)
        Description copied from class: Encoding
        Returns character length given character head returns 1 for singlebyte encodings or performs direct length table lookup for multibyte ones.
        Specified by:
        length in class Encoding
        Parameters:
        c - Character head Oniguruma equivalent: mbc_enc_len To be deprecated very soon (use length(byte[]bytes, int p, int end) version)
      • missing

        protected final int missing​(int n)
      • missing

        protected final int missing​(int b,
                                    int delta)
      • safeLengthForUptoFour

        protected final int safeLengthForUptoFour​(byte[] bytes,
                                                  int p,
                                                  int end)
      • lengthForTwoUptoFour

        protected final int lengthForTwoUptoFour​(byte[] bytes,
                                                 int p,
                                                 int end,
                                                 int b,
                                                 int s)
      • safeLengthForUptoThree

        protected final int safeLengthForUptoThree​(byte[] bytes,
                                                   int p,
                                                   int end)
      • safeLengthForUptoTwo

        protected final int safeLengthForUptoTwo​(byte[] bytes,
                                                 int p,
                                                 int end)
      • mbnMbcToCode

        protected final int mbnMbcToCode​(byte[] bytes,
                                         int p,
                                         int end)
      • caseMap

        public int caseMap​(IntHolder flagP,
                           byte[] bytes,
                           IntHolder pp,
                           int end,
                           byte[] to,
                           int toP,
                           int toEnd)
        Description copied from class: Encoding
        Oniguruma equivalent: case_map
        Specified by:
        caseMap in class Encoding
      • mbnMbcCaseFold

        protected final int mbnMbcCaseFold​(int flag,
                                           byte[] bytes,
                                           IntHolder pp,
                                           int end,
                                           byte[] lower)
      • mb2CodeToMbcLength

        protected final int mb2CodeToMbcLength​(int code)
      • mb4CodeToMbcLength

        protected final int mb4CodeToMbcLength​(int code)
      • mb2CodeToMbc

        protected final int mb2CodeToMbc​(int code,
                                         byte[] bytes,
                                         int p)
      • mb4CodeToMbc

        protected final int mb4CodeToMbc​(int code,
                                         byte[] bytes,
                                         int p)
      • mb2IsCodeCType

        protected final boolean mb2IsCodeCType​(int code,
                                               int ctype)
      • mb4IsCodeCType

        protected final boolean mb4IsCodeCType​(int code,
                                               int ctype)
      • strLength

        public int strLength​(byte[] bytes,
                             int p,
                             int end)
        Specified by:
        strLength in class Encoding
      • strCodeAt

        public int strCodeAt​(byte[] bytes,
                             int p,
                             int end,
                             int index)
        Specified by:
        strCodeAt in class Encoding
      • isCodeCTypeInternal

        protected final boolean isCodeCTypeInternal​(int code,
                                                    int ctype)
        ONIGENC_IS_XXXXXX_CODE_CTYPE
      • isNewLine

        public boolean isNewLine​(byte[] bytes,
                                 int p,
                                 int end)
        onigenc_is_mbc_newline_0x0a / used also by multibyte encodings
        Specified by:
        isNewLine in class Encoding
      • asciiMbcCaseFold

        protected final int asciiMbcCaseFold​(int flag,
                                             byte[] bytes,
                                             IntHolder pp,
                                             int end,
                                             byte[] lower)
      • mbcCaseFold

        public int mbcCaseFold​(int flag,
                               byte[] bytes,
                               IntHolder pp,
                               int end,
                               byte[] lower)
        onigenc_ascii_mbc_case_fold
        Specified by:
        mbcCaseFold in class Encoding
        Parameters:
        flag - case fold flag
        pp - an IntHolder that points at character head
        lower - a buffer where to extract case folded character Oniguruma equivalent: mbc_case_fold
      • applyAllCaseFold

        public void applyAllCaseFold​(int flag,
                                     ApplyAllCaseFoldFunction fun,
                                     Object arg)
        onigenc_ascii_apply_all_case_fold / used also by multibyte encodings
        Specified by:
        applyAllCaseFold in class Encoding
        Parameters:
        flag - case fold flag
        fun - case folding functor (look at: ApplyCaseFold)
        arg - case folding functor argument (look at: ApplyCaseFoldArg) Oniguruma equivalent: apply_all_case_fold
      • asciiCaseFoldCodesByString

        protected final CaseFoldCodeItem[] asciiCaseFoldCodesByString​(int flag,
                                                                      byte[] bytes,
                                                                      int p,
                                                                      int end)
      • caseFoldCodesByString

        public CaseFoldCodeItem[] caseFoldCodesByString​(int flag,
                                                        byte[] bytes,
                                                        int p,
                                                        int end)
        onigenc_ascii_get_case_fold_codes_by_str / used also by multibyte encodings
        Specified by:
        caseFoldCodesByString in class Encoding
      • propertyNameToCType

        public int propertyNameToCType​(byte[] bytes,
                                       int p,
                                       int end)
        onigenc_minimum_property_name_to_ctype notably overridden by unicode encodings
        Specified by:
        propertyNameToCType in class Encoding