Home > Software design >  JAVA, How to count the percentage of letters used in a sentence and guess the language
JAVA, How to count the percentage of letters used in a sentence and guess the language

Time:04-12

Im trying to get the percentage out of the letters in a sentence and then use language analysis to figure out what language it is.

I am able to count the letters but its not spitting out the exact thing I wish to have. As well as how to use the language analysis thing.

I found some code on another stack page that has helped a bit, but how do I get it to print out the letters as follows:

T 1/19 - 5%
h 1/19 - 5%
e 1/19 - 5%
r 2/19 - 10%
o 3/19 - 15%
u 1/19 - 5%
n 2/19 - 10%
d 1/19 - 5%
b 1/19 - 5%
w 1/19 - 5%
f 1/19 - 5%
x 1/19 - 5%

Output right now is this:

b occurs 1 times
d occurs 1 times
e occurs 1 times
f occurs 1 times
h occurs 1 times
n occurs 2 times
o occurs 3 times
r occurs 2 times
t occurs 1 times
u occurs 1 times
w occurs 1 times
x occurs 1 times
Total number of letters is 16

'Grace' 'Hopper' 'f�dd' 'Murray' 'f�dd' 'den' 'december' 'i' 'New' 'York' 'd�d' 'den' 'januari' 'i' 'Arlington' 'Virginia' 'var' 'en' 'amerikansk' 'datorpionj�r' 'och' 'sj�officer' 'flottiljamiral' 
 ---- Loop enum:
EN
Reading file assets/lang-samples/EN.txt
SV
Reading file assets/lang-samples/SV.txt
FI
Reading file assets/lang-samples/FI.txt

This is my code and the code is from another stack page that works in a sense...

import java.util.HashMap;

public class Main {

    public static void main(String[] args) {

        String sentence = "\"The round brown fox.";
        int[] letterCount = new int[2100];
        if (sentence.contains(".")) {
            // toLowerCase() the sentence since we count upper and lowercase as the same
            for (char letter : sentence.toLowerCase().toCharArray()) {
                if (Character.isLetter(letter)) {
                    letterCount[letter - 'a']  ;
                }
            }

            // Display the count of each letter that was found
            int sumOfLetters = 0;
            for (int i = 0; i < letterCount.length; i  ) {
                int count = letterCount[i];
                if (count > 0) {
                    System.out.println((char)(i   'a')   " occurs "   count   " times");
                    sumOfLetters  = count;
                }
            }

            System.out.println("Total number of letters is "   sumOfLetters);
        } else {
            System.out.println("You forgot a period.  Try again.");
        }

        String text = "Grace Hopper (född Murray), född den 9 december 1906 i New York, död den 1 januari 1992 i Arlington, Virginia, var en amerikansk dator-pionjär och sjöofficer (flottiljamiral).";


        text = text.replaceAll("[^\\p{L}\\s]", "");

        text = text.replaceAll("\\s ", " ");

        String[] words = text.split(" ");

        for (String word: words) {
            System.out.print("'"   word   "' ");
        }

        HashMap<LangLabel,Language> languages = new HashMap<>();
        System.out.println("\n ---- Loop enum:");
        for (LangLabel label: LangLabel.values()) {
            System.out.println(label);

            String content = Utils.readTextFile("assets/lang-samples/"   label   ".txt");

            languages.put(label, new Language(content, label));
        }

    }
}

public enum LangLabel {

    DE("tyska"),
    EN("engelska"),
    ES("spanska"),
    FI("finska"),
    FR("franska"),
    IT("italienska"),
    NO("norska"),
    SV("svenska");

    private String name;

    // Specialkonstruktor som körs av själva enum-konstanten
    LangLabel(String name) {
        this.name = name;
    }

    // Getter för namnet som initialiserats i konstruktorn. T.ex:
    // LangLabel.SV.getName(); returnerar "svenska"
    public String getName() {
        return name;
    }
}
import java.util.HashMap;

public class Language {
    String content;
    LangLabel label;

    HashMap<String, Integer> charCount = new HashMap<String,Integer>();
    HashMap<String, Double> charDistribution = new HashMap<String,Double>();

    public Language(String content, LangLabel label) {
        this.content = content;
        this.label = label;

        this.content = this.content.replaceAll("[^\\p{L}]", "").toLowerCase();

        calcCharDistribution();
    }

    private void calcCharDistribution() {

        for (int i = 0; i < content.length(); i  ) {
            //System.out.println(content.charAt(i));
            // Konvertera char till String:
            String letter = Character.toString(content.charAt(i));

            if (!charCount.containsKey(letter)) {
                charCount.put(letter, 0);
            }

            charCount.put(letter, charCount.get(letter) 1);

        }

        for (String letter: charCount.keySet()) {

            // Hur stor andel av hela texten är innevarande bokstav (*100 om vi vill ha det i procent)
            Double distr = (double) charCount.getOrDefault(letter, 0) / content.length() * 100;

            charDistribution.put(letter, distr);

            System.out.println(letter   " count: "   charCount.get(letter)   " distribution: "   distr);

        }

    }


}
import java.io.*;
import java.nio.charset.StandardCharsets;

public class Utils {
    static Object loadObject(String fileName) {
        Object retObj = null;

        try {
            ObjectInputStream objIn = new ObjectInputStream(new FileInputStream(fileName));

            retObj = objIn.readObject();

            objIn.close();

        } catch (FileNotFoundException e) {
            System.out.println("Ingen savefile hittades!");
        } catch (InvalidClassException e) {
            System.out.println("Savefilen är föråldrad, skapar nytt objekt.");
        } catch (IOException e) {
            e.printStackTrace();
        } catch (ClassNotFoundException e) {
            System.out.println("Class not found!");
            e.printStackTrace();
        }

        return retObj;
    }

    static void saveObject(String fileName, Object objectToSave) {
        System.out.println("Saving object to file "   fileName);
        try {
            FileOutputStream outStream = new FileOutputStream(fileName);
            ObjectOutputStream objOutStream = new ObjectOutputStream(outStream);

            objOutStream.writeObject(objectToSave);

            objOutStream.close();

        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }


    static String readTextFile(String fileName) {
        System.out.println("Reading file "   fileName);
        StringBuilder retStr = new StringBuilder();

        try {
            FileInputStream inStream = new FileInputStream(fileName);
            InputStreamReader reader = new InputStreamReader(inStream, StandardCharsets.UTF_8);
            BufferedReader bufferedReader = new BufferedReader(reader);

            String line;

            while ((line = bufferedReader.readLine()) != null) {
                retStr.append(line);
            }

            bufferedReader.close();

        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }

        return retStr.toString();

    }

    static void writeTextFile(String fileName, String fileContents) {
        System.out.println("Writing file "   fileName);

        try {
            FileOutputStream outStream = new FileOutputStream(fileName);
            OutputStreamWriter writer = new OutputStreamWriter(outStream, StandardCharsets.UTF_8);
            BufferedWriter bufferedWriter = new BufferedWriter(writer);

            bufferedWriter.write(fileContents);
            bufferedWriter.close();

            /* for (int i = 0; i < fileContents.length(); i  ) {
                writer.write(fileContents.charAt(i));
            }
            writer.close(); */

        } catch (FileNotFoundException e) {
            System.out.println("File not found");
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }

    }

}

CodePudding user response:

I wrote some code and got the following result.

T 1/19 - 5.26%
h 1/19 - 5.26%
e 1/19 - 5.26%
r 2/19 - 10.53%
o 3/19 - 15.79%
u 1/19 - 5.26%
n 2/19 - 10.53%
d 1/19 - 5.26%
b 1/19 - 5.26%
w 1/19 - 5.26%
f 1/19 - 5.26%
x 1/19 - 5.26%

I used a List to maintain the order of the characters. If the order is not important, you can use a Map.

Here's the complete runnable code. I made the additional class an inner class so I could post this code as one block.

import java.util.ArrayList;
import java.util.List;

public class LetterFrequency {

    public static void main(String[] args) {
        LetterFrequency lf = new LetterFrequency();
        String sentence = "The round brown fox.";
        sentence = sentence.replace(".", "");
        List<LetterCount> letterCounts = lf.calculateLetterCounts(sentence);
        
        for (LetterCount letterCount : letterCounts) {
            System.out.print(letterCount.getLetter()   " ");
            System.out.print(letterCount.getCount()   "/");
            System.out.print(sentence.length()   " - ");
            double percentage = 100.0 * letterCount.getCount() / sentence.length();
            System.out.println(String.format("%,.2f", percentage)   "%");
        }
    }
    
    public List<LetterCount> calculateLetterCounts(String sentence) {
        List<LetterCount> letterCounts = new ArrayList<>();
        char[] letters = sentence.toCharArray();
        for (char c : letters) {
            if (c != ' ') {
                addCount(letterCounts, c);
            }
        }
        
        return letterCounts;
    }
    
    private void addCount(List<LetterCount> letterCounts, char c) {
        for (LetterCount letterCount: letterCounts) {
            if (letterCount.getLetter() == c) {
                letterCount.incrementCount();
                return;
            } 
        }
        
        letterCounts.add(new LetterCount(c));
    }
    
    public class LetterCount {
        
        private final char letter;
        
        private int count;

        public LetterCount(char letter) {
            this.letter = letter;
            this.count = 1;
        }
        
        public void incrementCount() {
            this.count  ;
        }

        public char getLetter() {
            return letter;
        }

        public int getCount() {
            return count;
        }
        
    }

}

CodePudding user response:

Change the code when you count each letter in Main for this:

// Display the count of each letter that was found
        int sumOfLetters = IntStream.of(letterCount).sum();
        System.out.println(sumOfLetters);
        for (int i = 0; i < letterCount.length; i  ) {
            int count = letterCount[i];
            if (count > 0) {
                int percentage = count * 100 / 35;
                System.out.println((char)(i   'a')   " "   count   "/"   sumOfLetters   " - "   percentage   "%");
            }
        }

It count the amount of letter before the loop and also change the the println().

  • Related