|
| 1 | +package com.yetanalytics.xapi.validation; |
| 2 | + |
| 3 | +import java.util.regex.Pattern; |
| 4 | + |
| 5 | +public class Regex { |
| 6 | + /* Language Tag Regex */ |
| 7 | + /* RFC 5646, w/ lang subtag limitation */ |
| 8 | + |
| 9 | + // Language subtags |
| 10 | + // NOTE: We exclude 4-8 character subtags, even though they are allowed in |
| 11 | + // the RFC spec, since they are reserved for future (not current) use. |
| 12 | + private static final String LANG_TAG = "(?:[A-Za-z]{2,3})"; |
| 13 | + private static final String LANG_EXT = "(?:-[A-Za-z]{3})?"; |
| 14 | + |
| 15 | + // Other subtags |
| 16 | + private static final String SCRIPT = "(?:-[A-Za-z]{4})?"; |
| 17 | + private static final String REGION = "(?:-(?:[A-Za-z]{2}|\\d{3}))?"; |
| 18 | + private static final String VARIANT = "(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*"; |
| 19 | + private static final String EXTENSION = "(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*"; |
| 20 | + private static final String PRIVATE = "(?:-x(?:-[A-Za-z0-9]{1,8})+)?"; |
| 21 | + |
| 22 | + // Grandfathered tags |
| 23 | + private static final String GRANDFATHERED_IRREGULAR = |
| 24 | + "(?:" |
| 25 | + + "en-GB-oed|i-ami|i-bnn|i-default|i-enochian|i-hak|" |
| 26 | + + "i-klingon|i-lux|i-mingo|i-navajo|i-pwn|i-tao|i-tay|" |
| 27 | + + "i-tsu|sgn-BE-FR|sgn-BE-NL|sgn-CH-DE" |
| 28 | + + ")"; |
| 29 | + private static final String GRANDFATHERED_REGULAR = |
| 30 | + "(?:" |
| 31 | + + "art-lojban|cel-gaulish|no-bok|no-nyn|zh-guoyu|" |
| 32 | + + "zh-hakka|zh-min|zh-min-nan|zh-xiang" |
| 33 | + + ")"; |
| 34 | + |
| 35 | + // Tag |
| 36 | + private static final String LANGUAGE_TAG_STRING = |
| 37 | + "^(?:" |
| 38 | + + "(?:" + LANG_TAG + LANG_EXT + SCRIPT + REGION + VARIANT + EXTENSION + PRIVATE + ")" |
| 39 | + + "|" + GRANDFATHERED_IRREGULAR |
| 40 | + + "|" + GRANDFATHERED_REGULAR |
| 41 | + + ")$"; |
| 42 | + |
| 43 | + /* IRI Regex */ |
| 44 | + /* RFC 3897, with differences noted below */ |
| 45 | + |
| 46 | + /* Regular Expressions */ |
| 47 | + public static final Pattern LANGUAGE_TAG = |
| 48 | + Pattern.compile(LANGUAGE_TAG_STRING, Pattern.CASE_INSENSITIVE); |
| 49 | + |
| 50 | +} |
0 commit comments