Data files stored in the test. TXT, converts the data to List
Problem is that this seems to TXT size have certain limit, the data files can run 10 m, 100 m data transformation seems to have not come over... .
How can I change to let it run more large data file???????
Please answer, thank you!!!!!!
Public class Kmeans {
Public static ListFileToVector FileInputStream (fs) throws IOException {
BufferedReader br=new BufferedReader (new InputStreamReader (fs));
String readLine=null;
ListPoints=new ArrayList (a);
Double [] data=https://bbs.csdn.net/topics/new double [20].
While ((readLine=br. ReadLine ())!=null) {
String [] point=readLine. Split (" ");
For (int j=0; j<20; J++)
Data [j]=Double. ParseDouble (point) [j];
//double [] fr=data;
The Vector vec=new RandomAccessSparseVector (data. Length);
Vec. Assign (data);
Points. The add (vec);
}
Br. The close ();
Return points;
}
Public static void writePointsToFile (ListPoints,
The String fileName,
FileSystem fs,
The Configuration conf) throws IOException {
Path the Path=new Path (fileName);
//SequenceFile. Writer (FileSystem, the Configuration, the Path, the key getClass (), the value. The getClass ())
SequenceFile. Writer Writer=new SequenceFile. Writer (fs, the conf,
Path, LongWritable. Class, VectorWritable. Class);
Long recNum=0;
VectorWritable vec=new VectorWritable ();
For (Vector point: points) {
Vec. Set (point);
Writer. Append (new LongWritable (recNum++), vec);
}
Writer. The close ();
}
Public static void main (String args []) throws the Exception {
FileInputStream fis=new FileInputStream (" E: \ \ Users \ \ ZZS \ \ Desktop \ \ results \ \ new - data \ \ test. TXT ");
System. SetProperty (" HADOOP_USER_NAME ", "root");
Int k=4;
ListVectors=FileToVector (fis);
//ListVectors=getPoints (points);
The File testData=https://bbs.csdn.net/topics/new File (" clustering/testData ");
if (! TestData. The exists ()) {
TestData. The mkdir ();
}
TestData=https://bbs.csdn.net/topics/new File (" clustering/testData/points ");
if (! TestData. The exists ()) {
TestData. The mkdir ();
}
The Configuration conf=new Configuration ();
FileSystem fs=FileSystem. Get (conf);
//has resulted in a store can be mahout recognition vector file1 file
WritePointsToFile (vectors, "clustering/testdata/points/file1", fs, the conf);
Path Path=new Path (" clustering/testdata/clusters/part - 00000 ");
SequenceFile. Writer Writer=new SequenceFile. Writer (fs, the conf, path, the Text. The class, gathering. The class).
for (int i=0; i//int n=(int) (Math. The random () * points in length).
The Vector vec=vectors. The get (I);
Gathering cluster=new gathering (vec, I, new EuclideanDistanceMeasure ());//using Euclidean distance to measure the distance between two points
Writer. Append (new Text (cluster getIdentifier ()), cluster);
}
Writer. The close ();
KMeansDriver. Run (conf.
The new Path (" clustering/testdata/points "),//inputPath
The new Path (" clustering/testdata/clusters "),//ClusterPath
The new Path (" clustering/output "),//OutputPath
Convergence coefficient of 0.001,//convergenceDelta
10,//MaxIterations
True,//runCLustering
0,//clusterClassificationThreshold
True);//runSequential
SequenceFile. Reader Reader=new SequenceFile. Reader (fs,
New Path (" clustering/output/" + "clusteredPoints" + "/part - m - 0"), the conf);
The File f2=new File (" E: \ \ results \ \ hadoop \ \ kmeans ");
PrintStream ps=new PrintStream (f2 getPath ());
//PrintStream Console=System. Out;
System. SetOut (ps);
IntWritable key=new IntWritable ();
WeightedPropertyVectorWritable value=https://bbs.csdn.net/topics/new WeightedPropertyVectorWritable ();
//ClusterWritable value=https://bbs.csdn.net/topics/new ClusterWritable ();
While (reader. Next (key, value)) {
System. The out. Println (value. The toString () + "belongs to cluster" + key. The toString ());
}
Ps. The close ();
Reader. The close ();
}
}
]
CodePudding user response:
Some people have a look?