Novice for help: the small white, can you tell me the use simple doc2vec text study, there is an err-CodePudding

Novice for help: the small white, can you tell me the use simple doc2vec text study, how to solve the mistakes?

 
# coding: utf-8 

The import jieba 

The import gensim 

The from gensim. Models. Doc2vec import doc2vec 

TaggededDocument=gensim. Models. Doc2vec. TaggedDocument 


Def get_datasest () : 
With the open (" F: \ CSDN \ shuju TXT ", 'r', encoding='GBK', errors='ignore') as the cf: # # for the training set, read from a file, the content inside is a line of words 

Docs=cf. Readlines () 

X_train=[] 
CNT=0 
For I, the text enumerate in (docs) : 

CNT=CNT + 1 

Word_list="'. Join (jieba. The cut (text. The split (' \ n ') [0])). The split (' ') 

L=len (word_list) 

=word_list word_list [l - 1] [l - 1]. Strip () 

The document=TaggededDocument (word_list, tags=[I]) 

X_train. Append (document) 

Return x_train 


Def "train" (x_train vector_size=100, epoch_num=1) : 



Model_dm=Doc2Vec (x_train min_count=1, the window=5, vector_size=vector_size, sample=1-3, e negative=5, workers=4) 

Model_dm. "train" (x_train, total_examples=model_dm corpus_count, epochs=70) 

Model_dm. Save (' picking. The model ') # # model to save the location of the 

Return model_dm 


Def ceshi () : 
Model_dm=Doc2Vec. Load (F: \ "CSDN \ picking model") 



Str1='computer operating systems' 



Test_text="'. Join (jieba. The cut (str1)). The encode (' GBK '). The split (' ') 

Inferred_vector_dm=model_dm. Infer_vector (test_text) # # of text vector 

Print (inferred_vector_dm) 

Return inferred_vector_dm 


If __name__=="__main__ ': 
X_train=get_datasest () 

Model_dm="train" (x_train) 

Doc_2_vec=ceshi () 

Print the 
Type (doc_2_vec) 

Print (doc_2_vec. Shape)