001 /** 002 * ========================================= 003 * LibFormula : a free Java formula library 004 * ========================================= 005 * 006 * Project Info: http://reporting.pentaho.org/libformula/ 007 * 008 * (C) Copyright 2006-2007, by Pentaho Corporation and Contributors. 009 * 010 * This library is free software; you can redistribute it and/or modify it under the terms 011 * of the GNU Lesser General Public License as published by the Free Software Foundation; 012 * either version 2.1 of the License, or (at your option) any later version. 013 * 014 * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; 015 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 016 * See the GNU Lesser General Public License for more details. 017 * 018 * You should have received a copy of the GNU Lesser General Public License along with this 019 * library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, 020 * Boston, MA 02111-1307, USA. 021 * 022 * [Java is a trademark or registered trademark of Sun Microsystems, Inc. 023 * in the United States and other countries.] 024 * 025 * 026 * ------------ 027 * $Id: URLEncoder.java 3521 2007-10-16 10:55:14Z tmorgner $ 028 * ------------ 029 * (C) Copyright 2006-2007, by Pentaho Corporation. 030 */ 031 032 package org.jfree.formula.util; 033 034 import java.io.UnsupportedEncodingException; 035 036 /** 037 * Creation-Date: Jan 22, 2007, 4:36:38 PM 038 * 039 * @author Thomas Morgner 040 */ 041 public class URLEncoder 042 { 043 private URLEncoder() 044 { 045 } 046 047 private static final String[] hex = { 048 "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07", 049 "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F", 050 "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17", 051 "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F", 052 "%20", "%21", "%22", "%23", "%24", "%25", "%26", "%27", 053 "%28", "%29", "%2A", "%2B", "%2C", "%2D", "%2E", "%2F", 054 "%30", "%31", "%32", "%33", "%34", "%35", "%36", "%37", 055 "%38", "%39", "%3A", "%3B", "%3C", "%3D", "%3E", "%3F", 056 "%40", "%41", "%42", "%43", "%44", "%45", "%46", "%47", 057 "%48", "%49", "%4A", "%4B", "%4C", "%4D", "%4E", "%4F", 058 "%50", "%51", "%52", "%53", "%54", "%55", "%56", "%57", 059 "%58", "%59", "%5A", "%5B", "%5C", "%5D", "%5E", "%5F", 060 "%60", "%61", "%62", "%63", "%64", "%65", "%66", "%67", 061 "%68", "%69", "%6A", "%6B", "%6C", "%6D", "%6E", "%6F", 062 "%70", "%71", "%72", "%73", "%74", "%75", "%76", "%77", 063 "%78", "%79", "%7A", "%7B", "%7C", "%7D", "%7E", "%7F", 064 "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87", 065 "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F", 066 "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97", 067 "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F", 068 "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7", 069 "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF", 070 "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7", 071 "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF", 072 "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7", 073 "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF", 074 "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7", 075 "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF", 076 "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7", 077 "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF", 078 "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7", 079 "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF" 080 }; 081 082 /** 083 * Encode a string according to RFC 1738. 084 * <p/> 085 * <quote> "...Only alphanumerics [0-9a-zA-Z], the special characters "$-_.+!*'()," [not 086 * including the quotes - ed], and reserved characters used for their reserved purposes 087 * may be used unencoded within a URL."</quote> 088 * <p/> 089 * <ul> <li><p>The ASCII characters 'a' through 'z', 'A' through 'Z', and '0' through 090 * '9' remain the same. 091 * <p/> 092 * <li><p>The unreserved characters - _ . ! ~ * ' ( ) remain the same. 093 * <p/> 094 * <li><p>All other ASCII characters are converted into the 3-character string "%xy", 095 * where xy is the two-digit hexadecimal representation of the character code 096 * <p/> 097 * <li><p>All non-ASCII characters are encoded in two steps: first to a sequence of 2 or 098 * 3 bytes, using the UTF-8 algorithm; secondly each of these bytes is encoded as "%xx". 099 * </ul> 100 * 101 * @param s The string to be encoded 102 * @return The encoded string 103 */ 104 public static String encodeUTF8 (final String s) 105 { 106 final StringBuffer sbuf = new StringBuffer(); 107 final char[] sChars = s.toCharArray(); 108 final int len = sChars.length; 109 for (int i = 0; i < len; i++) 110 { 111 final int ch = sChars[i]; 112 if ('A' <= ch && ch <= 'Z') 113 { // 'A'..'Z' 114 sbuf.append((char) ch); 115 } 116 else if ('a' <= ch && ch <= 'z') 117 { // 'a'..'z' 118 sbuf.append((char) ch); 119 } 120 else if ('0' <= ch && ch <= '9') 121 { // '0'..'9' 122 sbuf.append((char) ch); 123 } 124 else if (ch == '-' || ch == '_' // unreserved 125 || ch == '.' || ch == '!' 126 || ch == '~' || ch == '*' 127 || ch == '\'' || ch == '(' 128 || ch == ')') 129 { 130 sbuf.append((char) ch); 131 } 132 else if (ch <= 0x007f) 133 { // other ASCII 134 sbuf.append(hex[ch]); 135 } 136 else if (ch <= 0x07FF) 137 { // non-ASCII <= 0x7FF 138 sbuf.append(hex[0xc0 | (ch >> 6)]); 139 sbuf.append(hex[0x80 | (ch & 0x3F)]); 140 } 141 else 142 { // 0x7FF < ch <= 0xFFFF 143 sbuf.append(hex[0xe0 | (ch >> 12)]); 144 sbuf.append(hex[0x80 | ((ch >> 6) & 0x3F)]); 145 sbuf.append(hex[0x80 | (ch & 0x3F)]); 146 } 147 } 148 return sbuf.toString(); 149 } 150 151 private static String encodeBytes (final byte[] s) 152 { 153 final StringBuffer sbuf = new StringBuffer(); 154 final int len = s.length; 155 for (int i = 0; i < len; i++) 156 { 157 final int ch = (s[i] & 0xff); 158 if ('A' <= ch && ch <= 'Z') 159 { // 'A'..'Z' 160 sbuf.append((char) ch); 161 } 162 else if ('a' <= ch && ch <= 'z') 163 { // 'a'..'z' 164 sbuf.append((char) ch); 165 } 166 else if ('0' <= ch && ch <= '9') 167 { // '0'..'9' 168 sbuf.append((char) ch); 169 } 170 else if (ch == '-' || ch == '_' // unreserved 171 || ch == '.' || ch == '!' 172 || ch == '~' || ch == '*' 173 || ch == '\'' || ch == '(' 174 || ch == ')') 175 { 176 sbuf.append((char) ch); 177 } 178 else 179 { // other ASCII 180 sbuf.append(hex[ch]); 181 } 182 } 183 return sbuf.toString(); 184 } 185 186 public static String encode (final String s, final String encoding) 187 throws UnsupportedEncodingException 188 { 189 if ("utf-8".equalsIgnoreCase(encoding)) 190 { 191 return encodeUTF8(s); 192 } 193 194 return encodeBytes(s.getBytes(encoding)); 195 } 196 197 }