I'm trying to build a NER
model using Bert-base-NER
for a tweets dataset
and ending up getting this error . Please help
This is what I have done
from transformers import AutoTokenizer, AutoModelForTokenClassification
from transformers import pipeline
tokenizer = AutoTokenizer.from_pretrained("dslim/bert-base-NER")
model = AutoModelForTokenClassification.from_pretrained("dslim/bert-base-NER")
nlp = pipeline("ner", model=model, tokenizer=tokenizer)
# ---------
def all_ents(v):
return [(ent.text, ent.label_) for ent in nlp(v).ents]
df1['Entities'] = df['text'].apply(lambda v: all_ents(v))
df1.head()
AttributeError: 'list' object has no attribute 'ents'
Thank you for the help
CodePudding user response:
It seems you mix code from different modules.
.ents
exists in module spacy
but not in transformers
#import spacy
import en_core_web_sm
nlp = en_core_web_sm.load()
doc = nlp('Hello World of Python. Have a nice day')
print([(x.text, x.label_) for x in doc.ents])
In transformers
you should use directly nlp(v)
but it gives directory with ent["entity"], ent["score"], ent["index"], ent["word"], ent["start"], ent["end"]
from transformers import AutoTokenizer, AutoModelForTokenClassification
from transformers import pipeline
tokenizer = AutoTokenizer.from_pretrained("dslim/bert-base-NER")
model = AutoModelForTokenClassification.from_pretrained("dslim/bert-base-NER")
nlp = pipeline("ner", model=model, tokenizer=tokenizer)
# ---------
import pandas as pd
df = pd.DataFrame({
'text': ['Hello World of Python. Have a nice day']
})
# ---------
def all_ents(v):
#print(nlp(v))
return [(ent['word'], ent['entity']) for ent in nlp(v)]
df['Entities'] = df['text'].apply(all_ents)
#df1['Entities'] = df['text'].apply(lambda v: [(ent['word'], ent['entity']) for ent in nlp(v)])
print(df['Entities'].head())