Home > other >  Trouble points come in and see, can you tell me why when using graphs execution TFIDF algorithm in r
Trouble points come in and see, can you tell me why when using graphs execution TFIDF algorithm in r

Time:10-29

Source attached, could you tell me why the card in the case of reduce0 map0 % %?
Using pseudo distributed installation

 package cn. Itcast. Mapreduce3; 

The import cn. Itcast. Mapreduce2. JobMain2;
Import org, apache hadoop. Conf. Configuration;
Import org, apache hadoop. Conf. Configured;
Import org, apache hadoop. Fs. The Path;
Import org, apache hadoop. IO. Text;
Import org, apache hadoop. Graphs. The Job;
Import org, apache hadoop. Graphs. Lib. Input. TextInputFormat;
Import org, apache hadoop. Graphs. Lib. Output. TextOutputFormat;
Import org, apache hadoop. Util. Tool;
Import org, apache hadoop. Util. ToolRunner;

The import java.net.URI;

Public class JobMain3 extends Configured implements Tool {
Public int the run (String [] strings) throws the Exception {
Job job3=Job. GetInstance (super getConf (), "job3");
System. SetProperty (" HADOOP_USER_NAME ", "root");
Job3. AddCacheFile (new URI (" HDFS://localhost: 9000/TFIDF/part - r - 00000 "));
Job3. AddCacheFile (new URI (" HDFS://localhost: 9000/TFIDF/part - r - 00003 "));
Job3. SetMapperClass (TFIDF_mapper3. Class);
Job3. SetMapOutputKeyClass (Text. Class);
Job3. SetMapOutputValueClass (Text. Class);
Job3. SetInputFormatClass (TextInputFormat. Class);
TextInputFormat. AddInputPath (job3, new Path (" file:///D:\\hdfsdata\\TFIDF_result1 "));
Job3. SetReducerClass (TFIDF_reudce3. Class);
Job3. SetOutputKeyClass (Text. Class);
Job3. SetOutputValueClass (Text. Class);
Job3. SetOutputFormatClass (TextOutputFormat. Class);
TextOutputFormat. SetOutputPath (job3, new Path (" file:///D:\\hdfsdata\\TFIDF_result3 "));
Boolean b=job3. WaitForCompletion (true);
Return b? 1-0.
}

Public static void main (String [] args) throws the Exception {
The Configuration Configuration=new Configuration ();
Int run=ToolRunner. Run (configuration, new JobMain3 (), args);
System. The exit (run);
}
}
 package cn. Itcast. Mapreduce3; 

Import org, apache hadoop. Fs. FSDataInputStream;
Import org, apache hadoop. Fs. FileSystem;
Import org, apache hadoop. Fs. The Path;
Import org, apache hadoop. IO. LongWritable;
Import org, apache hadoop. IO. Text;
Import org, apache hadoop. Graphs. InputSplit;
Import org, apache hadoop. Graphs. Mapper;
Import org, apache hadoop. Graphs. Lib. Input. FileSplit;
Import org, apache hadoop. Yarn. Webapp. Hamlet. Hamlet;

Import the Java. IO. BufferedReader;
Import the Java. IO. FileReader;
Import the Java. IO. IOException;
Import the Java. IO. InputStreamReader;
The import java.net.URI;
Import the Java. Text. NumberFormat;
Import the Java. Util. HashMap;
Import the Java. Util. The Map;

Public class TFIDF_mapper3 extends Mapper {
Total deposit//weibo D
Private static Map Cmap=null;
Deposit//df a certain number of words in the text
Private static Map Df=null;

Protected void setup (Context Context) throws IOException, InterruptedException {
If (cmap==null | | cmap. The size ()==0 | | df==null | | df. The size ()==0) {
URI [] cacheFiles=context. GetCacheFiles ();//part - r - 00000, part - r - 00003
If (cacheFiles!=null) {
//will have two documents so will use a for loop to find corresponding document
for (int i=0; i URI URI=cacheFiles [I];
If (uri. GetPath (). The endsWith (" part - r - 00003 ")) {
//Path Path=new Path (uri getPath ());//this is the Path to part - r - 00003 for the Path of the Path
FileSystem FileSystem=FileSystem. Get (cacheFiles [I], context, getConfiguration ());
FSDataInputStream a dataInputStream=fileSystem. Open (new Path (cacheFiles [I]));
//to get corresponding Path content, is a byte stream - "character buffer flow
BufferedReader BufferedReader=new BufferedReader (new InputStreamReader (a dataInputStream));
String readLine=bufferedReader readLine ();
If (readLine. StartsWith (" count ")) {
The split String []=readLine. Split (" \ t ");//when I was only a set of data using if operation
Cmap=new HashMap (a);
Cmap. Put (split [0], Integer. The parseInt (split [1]. The trim ()));
}
BufferedReader. The close ();
FileSystem. The close ();
}
Else if (uri. GetPath (). The endsWith (" part - r - 00000 ")) {
FileSystem FileSystem=FileSystem. Get (cacheFiles [I], context, getConfiguration ());
FSDataInputStream a dataInputStream=fileSystem. Open (new Path (cacheFiles [I]));
BufferedReader BufferedReader=new BufferedReader (new InputStreamReader (a dataInputStream));
String readLine=bufferedReader readLine ();
While (readLine!=null) {
The split String []=readLine. Split (" \ t ");
Df=new HashMap (a);
Df. Put (split [0], Integer. The parseInt (split [1]. The trim ()));
}
BufferedReader. The close ();
FileSystem. The close ();
}
}
}
}
}

Protected void map (LongWritable key, the Text value, Context Context) throws IOException, InterruptedException {
//the Map phase read data is: part - r - 00000000-01000-02... So we should read the four file path getInputSplit
FileSplit FileSplit=(FileSplit) context. GetInputSplit ();
String name=fileSplit. GetPath (). The getName (); nullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnull
  • Related