001 package org.maltparser.core.feature.map;
002
003 import java.util.regex.Pattern;
004 import java.util.regex.PatternSyntaxException;
005
006 import org.maltparser.core.exception.MaltChainedException;
007 import org.maltparser.core.feature.FeatureException;
008 import org.maltparser.core.feature.function.FeatureFunction;
009 import org.maltparser.core.feature.function.FeatureMapFunction;
010 import org.maltparser.core.feature.value.FeatureValue;
011 import org.maltparser.core.feature.value.FunctionValue;
012 import org.maltparser.core.feature.value.MultipleFeatureValue;
013 import org.maltparser.core.feature.value.SingleFeatureValue;
014 import org.maltparser.core.symbol.SymbolTable;
015 import org.maltparser.core.symbol.SymbolTableHandler;
016
017 /**
018 *
019 *
020 * @author Johan Hall
021 */
022 public class SplitFeature implements FeatureMapFunction {
023 protected FeatureFunction parentFeature;
024 protected MultipleFeatureValue multipleFeatureValue;
025 protected SymbolTableHandler tableHandler;
026 protected SymbolTable table;
027 protected String separators;
028 protected Pattern separatorsPattern;
029
030 public SplitFeature(SymbolTableHandler tableHandler) throws MaltChainedException {
031 super();
032 setTableHandler(tableHandler);
033 multipleFeatureValue = new MultipleFeatureValue(this);
034 }
035
036 public void initialize(Object[] arguments) throws MaltChainedException {
037 if (arguments.length != 2) {
038 throw new FeatureException("Could not initialize SplitFeature: number of arguments are not correct. ");
039 }
040 if (!(arguments[0] instanceof FeatureFunction)) {
041 throw new FeatureException("Could not initialize SplitFeature: the first argument is not a feature. ");
042 }
043 if (!(arguments[1] instanceof String)) {
044 throw new FeatureException("Could not initialize SplitFeature: the second argument is not a string. ");
045 }
046 setParentFeature((FeatureFunction)arguments[0]);
047 setSeparators((String)arguments[1]);
048 setSymbolTable(tableHandler.addSymbolTable("SPLIT_"+parentFeature.getSymbolTable().getName(), parentFeature.getSymbolTable()));
049 }
050
051 public Class<?>[] getParameterTypes() {
052 Class<?>[] paramTypes = { org.maltparser.core.feature.function.FeatureFunction.class, java.lang.String.class };
053 return paramTypes;
054 }
055
056 public FeatureValue getFeatureValue() {
057 return multipleFeatureValue;
058 }
059
060 public String getSymbol(int code) throws MaltChainedException {
061 return table.getSymbolCodeToString(code);
062 }
063
064 public int getCode(String symbol) throws MaltChainedException {
065 return table.getSymbolStringToCode(symbol);
066 }
067
068 public void update() throws MaltChainedException {
069 multipleFeatureValue.reset();
070 parentFeature.update();
071 FunctionValue value = parentFeature.getFeatureValue();
072 if (value instanceof SingleFeatureValue) {
073 String symbol = ((SingleFeatureValue)value).getSymbol();
074 if (((FeatureValue)value).isNullValue()) {
075 multipleFeatureValue.addFeatureValue(parentFeature.getSymbolTable().getSymbolStringToCode(symbol), symbol, true);
076 multipleFeatureValue.setNullValue(true);
077 } else {
078 String items[];
079 try {
080 items = separatorsPattern.split(symbol);
081 } catch (PatternSyntaxException e) {
082 throw new FeatureException("The split feature '"+this.toString()+"' could not split the value using the following separators '"+separators+"'",e);
083 }
084 for (int i = 0; i < items.length; i++) {
085 if (items[i].length() > 0) {
086 multipleFeatureValue.addFeatureValue(table.addSymbol(items[i]), items[i], table.getKnown(items[i]));
087 }
088 }
089 multipleFeatureValue.setNullValue(false);
090 }
091 } else if (value instanceof MultipleFeatureValue) {
092 if (((MultipleFeatureValue)value).isNullValue()) {
093 multipleFeatureValue.addFeatureValue(parentFeature.getSymbolTable().getSymbolStringToCode(((MultipleFeatureValue)value).getFirstSymbol()), ((MultipleFeatureValue)value).getFirstSymbol(), true);
094 multipleFeatureValue.setNullValue(true);
095 } else {
096 for (String symbol : ((MultipleFeatureValue)value).getSymbols()) {
097 String items[];
098 try {
099 items = separatorsPattern.split(symbol);
100 } catch (PatternSyntaxException e) {
101 throw new FeatureException("The split feature '"+this.toString()+"' could not split the value using the following separators '"+separators+"'", e);
102 }
103 for (int i = 0; i < items.length; i++) {
104 multipleFeatureValue.addFeatureValue(table.addSymbol(items[i]), items[i], table.getKnown(items[i]));
105 }
106 multipleFeatureValue.setNullValue(false);
107 }
108 }
109 }
110 }
111
112 public void updateCardinality() throws MaltChainedException {
113 parentFeature.updateCardinality();
114 multipleFeatureValue.setCardinality(table.getValueCounter());
115 }
116
117 public boolean equals(Object obj) {
118 if (this == obj)
119 return true;
120 if (obj == null)
121 return false;
122 if (getClass() != obj.getClass())
123 return false;
124 return obj.toString().equals(this.toString());
125 }
126
127 public FeatureFunction getParentFeature() {
128 return parentFeature;
129 }
130
131 public void setParentFeature(FeatureFunction parentFeature) {
132 this.parentFeature = parentFeature;
133 }
134
135 public String getSeparators() {
136 return separators;
137 }
138
139 public void setSeparators(String separators) {
140 this.separators = separators;
141 separatorsPattern = Pattern.compile(separators);
142 }
143
144 public SymbolTable getSymbolTable() {
145 return table;
146 }
147
148 public void setSymbolTable(SymbolTable table) {
149 this.table = table;
150 }
151
152 public SymbolTableHandler getTableHandler() {
153 return tableHandler;
154 }
155
156 public void setTableHandler(SymbolTableHandler tableHandler) {
157 this.tableHandler = tableHandler;
158 }
159
160
161
162 public String toString() {
163 final StringBuilder sb = new StringBuilder();
164 sb.append("Split(");
165 sb.append(parentFeature.toString());
166 sb.append(", ");
167 sb.append(separators);
168 sb.append(')');
169 return sb.toString();
170 }
171 }
172