Ява, 3416 байт, 62%
это мое решение, я анализирую список заданных слов и нахожу 60 наиболее распространенных биграмм и триграмм для каждого языка. Теперь я проверяю свои n-граммы против слова и выбираю язык с большинством n-граммов в слове.
public class Classificator {
String[][] triGr = {
{"ing","ion","ent","tio","ted","nce","ter","res","ati","con","ess","ate","pro","ain","est","ons","men","ect","red","rea","com","ere","ers","nte","ine","her","ble","ist","tin","for","per","der","ear","str","ght","pre","ver","int","nde","the","igh","ive","sta","ure","end","enc","ned","ste","dis","ous","all","and","anc","ant","oun","ten","tra","are","sed","cti"},
{"sch","che","ver","gen","ten","cht","ich","ein","ste","ter","hen","nde","nge","ach","ere","ung","den","sse","ers","and","eit","ier","ren","sen","ges","ang","ben","rei","est","nen","nte","men","aus","der","ent","hei","her","lle","ern","ert","uch","ine","ehe","auf","lie","tte","ige","ing","hte","mme","end","wei","len","hre","rau","ite","bes","ken","cha","ebe"},
{"ent","are","ato","nte","ett","ere","ion","chi","con","one","men","nti","gli","pre","ess","att","tto","par","per","sta","tra","zio","and","iam","end","ter","res","est","nto","tta","acc","sci","cia","ver","ndo","amo","ant","str","tro","ssi","pro","era","eri","nta","der","ate","ort","com","man","tor","rat","ell","ale","gio","ont","col","tti","ano","ore","ist"},
{"sze","ere","meg","ett","gye","ele","ond","egy","enn","ott","tte","ete","unk","ban","tem","agy","zer","esz","tet","ara","nek","hal","dol","mon","art","ala","ato","szt","len","men","ben","kap","ent","min","ndo","eze","sza","isz","fog","kez","ind","ten","tam","nak","fel","ene","all","asz","gon","mar","zem","szo","tek","zet","elm","het","eve","ssz","hat","ell"}
};
static String[][] biGr = {
{"in","ed","re","er","es","en","on","te","ng","st","nt","ti","ar","le","an","se","de","at","ea","co","ri","ce","or","io","al","is","it","ne","ra","ro","ou","ve","me","nd","el","li","he","ly","si","pr","ur","th","di","pe","la","ta","ss","ns","nc","ll","ec","tr","as","ai","ic","il","us","ch","un","ct"},
{"en","er","ch","te","ge","ei","st","an","re","in","he","ie","be","sc","de","es","le","au","se","ne","el","ng","nd","un","ra","ar","nt","ve","ic","et","me","ri","li","ss","it","ht","ha","la","is","al","eh","ll","we","or","ke","fe","us","rt","ig","on","ma","ti","nn","ac","rs","at","eg","ta","ck","ol"},
{"re","er","to","ar","en","te","ta","at","an","nt","ra","ri","co","on","ti","ia","or","io","in","st","tt","ca","es","ro","ci","di","li","no","ma","al","am","ne","me","le","sc","ve","sa","si","tr","nd","se","pa","ss","et","ic","na","pe","de","pr","ol","mo","do","so","it","la","ce","ie","is","mi","cc"},
{"el","en","sz","te","et","er","an","me","ta","on","al","ar","ha","le","gy","eg","re","ze","em","ol","at","ek","es","tt","ke","ni","la","ra","ne","ve","nd","ak","ka","in","am","ad","ye","is","ok","ba","na","ma","ed","to","mi","do","om","be","se","ag","as","ez","ot","ko","or","cs","he","ll","nn","ny"}
};
public int guess(String word) {
if (word.length() < 3) {
return 4; // most words below 2 characters on list are hungarians
}
int score[] = { 0, 0, 0, 0 };
for (int i = 0; i < 4; i++) {
for (String s : triGr[i]) {
if (word.contains(s)) {
score[i] = score[i] + 2;
}
}
for (String s : biGr[i]) {
if (word.contains(s)) {
score[i] = score[i] + 1;
}
}
}
int v = -1;
int max = 0;
for (int i = 0; i < 4; i++) {
if (score[i] > max) {
max = score[i];
v = i;
}
}
v++;
return v==0?Math.round(4)+1:v;
}
}
и это мой тест
public class Test {
Map<String, List<Integer>> words = new HashMap<String, List<Integer>>();
boolean validate(String word, Integer lang) {
List<Integer> langs = words.get(word);
return langs.contains(lang);
}
public static void main(String[] args) throws FileNotFoundException {
FileReader reader = new FileReader("list.txt");
BufferedReader buf = new BufferedReader(reader);
Classificator cl = new Classificator();
Test test = new Test();
buf.lines().forEach(x -> test.process(x));
int guess = 0, words = 0;
for (String word : test.words.keySet()) {
int lang = cl.guess(word);
if (lang==0){
continue;
}
boolean result = test.validate(word, lang);
words++;
if (result) {
guess++;
}
}
System.out.println(guess+ " "+words+ " "+(guess*100f/words));
}
private void process(String x) {
String arr[] = x.split("\\s+");
String word = arr[0].trim();
List<Integer> langs = words.get(word);
if (langs == null) {
langs = new ArrayList<Integer>();
words.put(word, langs);
}
langs.add(Integer.parseInt(arr[1].trim()));
}
}