"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.Tokenizer = exports.Charset = void 0; const separators_1 = require("./edi/separators"); class Charset { constructor(name, configuration, admissibleAlphabet, unicode = false) { this.name = name; const exclude = configuration.delimiters(); const alphas = this.compile(admissibleAlphabet, exclude, unicode); this.alpha = alphas[0]; this.alphanumeric = alphas[1]; if (unicode) { this.numeric = /[-]?[\p{Nd}]*/g; this.decimal = /[\p{Nd}]*/gu; } else { this.numeric = /[-]?[0-9]*/g; this.decimal = /[0-9]*/g; } } compile(admissibleAlphabet, excludes, unicode = false) { const flag = unicode ? "gu" : "g"; let output = ""; for (const seq of admissibleAlphabet) { if (seq.length > 1) { const start = seq[0].codePointAt(0); const end = seq[1].codePointAt(0); if (start && end) { for (let i = start; i <= end; i++) { if (!excludes.includes(i)) { output += separators_1.Separators.escapeIfNeeded(String.fromCodePoint(i)); } } } } else { const idx = seq[0].codePointAt(0); if (idx) { output += separators_1.Separators.escapeIfNeeded(String.fromCodePoint(idx)); } } } const ret = []; ret.push(new RegExp("[" + output + "]*", flag)); ret.push(new RegExp("[0-9" + output + "]*", flag)); return ret; } } exports.Charset = Charset; class UNOA extends Charset { constructor(config) { super("UNOA", config, UNOA.charset); } } UNOA.charset = [ ["\u0020"], ["\u0028", "\u0029"], ["\u002C", "\u002F"], ["\u003D"], ["\u0041", "\u005A"] ]; class UNOB extends Charset { constructor(config) { super("UNOB", config, UNOB.charset); } } UNOB.charset = [ ["\u0020", "\u0022"], ["\u0041", "\u005A"], ["\u0025", "\u002F"], ["\u003A", "\u003F"], ["\u0061", "\u007A"] ]; class UNOC extends Charset { constructor(config) { super("UNOC", config, UNOC.charset); } } UNOC.charset = [ ["\u0020", "\u002F"], ["\u003A", "\u007E"], ["\u00A0", "\u00FF"] ]; class UNOD extends Charset { constructor(config) { super("UNOD", config, UNOD.charset); } } UNOD.charset = [ ["\u0020", "\u002F"], ["\u003A", "\u007E"], ["\u00A0"], ["\u0104"], ["\u02D8"], ["\u0141"], ["\u00A4"], ["\u013D"], ["\u015A"], ["\u00A7"], ["\u0048"], ["\u0160"], ["\u015E"], ["\u0164"], ["\u0179"], ["\u00AD"], ["\u017D"], ["\u017B"], ["\u00B0"], ["\u0105"], ["\u02DB"], ["\u0142"], ["\u00B4"], ["\u013E"], ["\u015B"], ["\u02C7"], ["\u00B8"], ["\u0161"], ["\u015F"], ["\u0165"], ["\u017A"], ["\u02DD"], ["\u017E"], ["\u017C"], ["\u0154"], ["\u00C1"], ["\u00C2"], ["\u0102"], ["\u00C4"], ["\u0139"], ["\u0106"], ["\u00C7"], ["\u010C"], ["\u00C9"], ["\u0118"], ["\u00CB"], ["\u011A"], ["\u00CD"], ["\u00CE"], ["\u010E"], ["\u0110"], ["\u0143"], ["\u0147"], ["\u00D3"], ["\u00D4"], ["\u0150"], ["\u00D6"], ["\u00D7"], ["\u0158"], ["\u016E"], ["\u00DA"], ["\u0170"], ["\u00DC"], ["\u00DD"], ["\u0162"], ["\u00DF"], ["\u0155"], ["\u00E1"], ["\u00E2"], ["\u0103"], ["\u00E4"], ["\u013A"], ["\u0107"], ["\u00E7"], ["\u010D"], ["\u00E9"], ["\u0119"], ["\u00EB"], ["\u011B"], ["\u00ED"], ["\u00EE"], ["\u010F"], ["\u0111"], ["\u0144"], ["\u0148"], ["\u00F3"], ["\u00F4"], ["\u0151"], ["\u00F6"], ["\u00F7"], ["\u0159"], ["\u016F"], ["\u00FA"], ["\u0171"], ["\u00FC"], ["\u00FD"], ["\u0163"], ["\u02D9"] ]; class UNOE extends Charset { constructor(config) { super("UNOE", config, UNOE.charset); } } UNOE.charset = [ ["\u0020", "\u002F"], ["\u003A", "\u007E"], ["\u00A0"], ["\u0401", "\u040C"], ["\u00AD"], ["\u040E", "\u044F"], ["\u2116"], ["\u0451", "\u045C"], ["\u00A7"], ["\u045E"], ["\u045F"] ]; class UNOF extends Charset { constructor(config) { super("UNOF", config, UNOF.charset); } } UNOF.charset = [ ["\u0020", "\u002F"], ["\u003A", "\u007E"], ["\u00A0"], ["\u2018", "\u2019"], ["\u00A3"], ["\u20AC"], ["\u20AF"], ["\u00A6", "\u00A9"], ["\u037A"], ["\u00AB", "\u00AD"], ["\u2015"], ["\u00B0", "\u00B3"], ["\u0384", "\u0386"], ["\u00B7"], ["\u0388", "\u038A"], ["\u00BB"], ["\u038C", "\u03A1"], ["\u03A3", "\u03CE"] ]; class UNOG extends Charset { constructor(config) { super("UNOG", config, UNOG.charset); } } UNOG.charset = [ ["\u0020", "\u002F"], ["\u003A", "\u007E"], ["\u00A0"], ["\u0126"], ["\u02D8"], ["\u00A3"], ["\u00A4"], ["\u0124"], ["\u00A7"], ["\u00A8"], ["\u0130"], ["\u015E"], ["\u011E"], ["\u0134"], ["\u00AD"], ["\u017B"], ["\u00B0"], ["\u0127"], ["\u00B2"], ["\u00B3", "\u00B5"], ["\u0125"], ["\u00B7"], ["\u00B8"], ["\u0131"], ["\u015F"], ["\u011F"], ["\u0135"], ["\u00BD"], ["\u017C"], ["\u00C0", "\u00C2"], ["\u00C4"], ["\u010A"], ["\u0108"], ["\u00C7", "\u00CF"], ["\u00D1", "\u00D4"], ["\u0120"], ["\u00D6"], ["\u00D7"], ["\u011C"], ["\u00D9", "\u00DC"], ["\u016C"], ["\u015C"], ["\u00DF", "\u00E2"], ["\u00E4"], ["\u010B"], ["\u0109"], ["\u00E7", "\u00EF"], ["\u00F1", "\u00F4"], ["\u0121"], ["\u00F6"], ["\u00F7"], ["\u011D"], ["\u00F9", "\u00FC"], ["\u016D"], ["\u015D"], ["\u02D9"] ]; class UNOH extends Charset { constructor(config) { super("UNOH", config, UNOH.charset); } } UNOH.charset = [ ["\u0020", "\u002F"], ["\u003A", "\u007E"], ["\u00A0"], ["\u0104"], ["\u0138"], ["\u0156"], ["\u00A4"], ["\u0128"], ["\u013B"], ["\u00A7"], ["\u00A8"], ["\u0160"], ["\u0112"], ["\u0122"], ["\u0166"], ["\u00AD"], ["\u017D"], ["\u00AF"], ["\u00B0"], ["\u0105"], ["\u02DB"], ["\u0157"], ["\u00B4"], ["\u0129"], ["\u013C"], ["\u02C7"], ["\u00B8"], ["\u0161"], ["\u0113"], ["\u0123"], ["\u0167"], ["\u014A"], ["\u017E"], ["\u014B"], ["\u0100"], ["\u00C1", "\u00C6"], ["\u012E"], ["\u010C"], ["\u00C9"], ["\u0118"], ["\u00CB"], ["\u0116"], ["\u00CD"], ["\u00CE"], ["\u012A"], ["\u0110"], ["\u0145"], ["\u014C"], ["\u0136"], ["\u00D4", "\u00D8"], ["\u0172"], ["\u00DA", "\u00DC"], ["\u0168"], ["\u016A"], ["\u00DF"], ["\u0101"], ["\u00E1", "\u00E6"], ["\u012F"], ["\u010D"], ["\u00E9"], ["\u0119"], ["\u00EB"], ["\u0117"], ["\u00ED"], ["\u00EE"], ["\u0128"], ["\u0111"], ["\u0146"], ["\u014D"], ["\u0137"], ["\u00F4", "\u00F8"], ["\u0173"], ["\u00FA", "\u00FC"], ["\u0169"], ["\u0168"], ["\u02D9"] ]; class UNOI extends Charset { constructor(config) { super("UNOI", config, UNOI.charset); } } UNOI.charset = [ ["\u0020", "\u002F"], ["\u003A", "\u007E"], ["\u00A0"], ["\u00A4"], ["\u060C"], ["\u00AD"], ["\u061B"], ["\u061F"], ["\u0621", "\u063A"], ["\u0640", "\u0652"] ]; class UNOJ extends Charset { constructor(config) { super("UNOJ", config, UNOJ.charset); } } UNOJ.charset = [ ["\u0020", "\u002F"], ["\u003A", "\u007E"], ["\u00A0"], ["\u00A2", "\u00A9"], ["\u00D7"], ["\u00AB", "\u00B9"], ["\u00F7"], ["\u00BB", "\u00BE"], ["\u2017"], ["\u05D0", "\u05EA"], ["\u200E"], ["\u200F"] ]; class UNOK extends Charset { constructor(config) { super("UNOK", config, UNOK.charset); } } UNOK.charset = [ ["\u0020", "\u002F"], ["\u003A", "\u007E"], ["\u00A0", "\u00CF"], ["\u011E"], ["\u00D1", "\u00DC"], ["\u0130"], ["\u015E"], ["\u00DF", "\u00EF"], ["\u011F"], ["\u00F1", "\u00FC"], ["\u0131"], ["\u015F"], ["\u00FF"] ]; class Tokenizer { constructor(config) { this.errors = { secondDecimalMark: function () { return new Error("Cannot accept a second decimal mark while parsing a number"); } }; this.regexes = this.setCharsetBasedOnConfig(config); this.regex = this.regexes.alphanumeric; this.buffer = ""; } alpha() { this.regex = this.regexes.alpha; } alphanumeric() { this.regex = this.regexes.alphanumeric; } numeric() { this.regex = this.regexes.numeric; } decimal(chunk, index) { let result = "."; switch (this.regex) { case this.regexes.numeric: this.regex = this.regexes.decimal; break; case this.regexes.alpha: case this.regexes.alphanumeric: result = chunk.charAt(index); break; case this.regexes.decimal: throw this.errors.secondDecimalMark(); } this.buffer += result; } setCharsetBasedOnConfig(config) { switch (config.charset) { case "UNOA": this.regexes = new UNOA(config); break; case "UNOB": this.regexes = new UNOB(config); break; case "UNOC": this.regexes = new UNOC(config); break; case "UNOD": this.regexes = new UNOD(config); break; case "UNOE": this.regexes = new UNOE(config); break; case "UNOF": this.regexes = new UNOF(config); break; case "UNOG": this.regexes = new UNOG(config); break; case "UNOH": this.regexes = new UNOH(config); break; case "UNOI": this.regexes = new UNOI(config); break; case "UNOJ": this.regexes = new UNOJ(config); break; case "UNOK": this.regexes = new UNOK(config); break; default: throw new Error(`Unsupported charset encoding '${config.charset}'`); } return this.regexes; } segment(chunk, index) { let code; const start = index; while ((code = chunk.charCodeAt(index) || 0) < 91 && code > 64) { index++; } this.buffer += chunk.slice(start, index); return index; } data(chunk, index) { this.regex.lastIndex = index; this.regex.test(chunk); this.buffer += chunk.slice(index, this.regex.lastIndex); return this.regex.lastIndex; } release(chunk, index) { this.buffer += chunk.charAt(index); } length() { return this.buffer.length - (this.regex === this.regexes.decimal ? 1 : 0); } content() { return this.buffer; } } exports.Tokenizer = Tokenizer; //# sourceMappingURL=tokenizer.js.map