Próbuję wytrenować model przestrzenny dla ner. Mam zestaw danych z 2940 wierszami i wyszkoliłem model podstawowy, aby jego nazwa była current_model z tymi danymi i otrzymałem kolejne 10 odrębnych zestawów danych, z których każdy ma wiersze od 200 do 530 wierszy, więc załadowałem mój current_model przy użyciu spacy's { {X1}} następnie trenowałem przy użyciu mojego każdego zbioru danych. i próbowałem przewidzieć ner przy użyciu test data, rozpoznaje go w nowym zbiorze danych, ale wydaje się, że zapomina ner w najstarszym zbiorze danych. Zrobiłem to, aby skrócić czas szkolenia. zobacz mój kod poniżej, aby zobaczyć, co próbowałem zrobić

Kod do szkolenia modelu podstawowego

import spacy
from spacy.util import minibatch,compounding
import random
from pathlib import Path
from spacy import displacy
import re
import pandas as pd
from bs4 import BeautifulSoup
from datetime import datetime
imporcytoolz import partition_all
import os
from os import path
import shutil
import json


df = pd.read_csv("new_annotations/dataset_transfer_learning1.csv")
def populate_train_data(df):
train_data = []
i =0
for d_index, row in df.iterrows():
    print(row["annotations"])
    content = row["annotations"].replace("\\n", "\n").replace("\n", " ")
    content = re.sub(r"(?<=[:])(?=[^\s])", r" ", content)

# Finding tags and entities and store values in a entity list-----
soup = BeautifulSoup(content, "html.parser")
text = soup.get_text()
entities = []
for tag in soup.find_all():
    if tag.string is None:
        # failing silently for invalid tag
        print(f'Tagging is invalid: {row["_id"], tag.name}, on row {i+2}skipping..')
        continue

tag_index = content.split(str(tag))[0].count(tag.string)
try:

for index, match in enumerate(re.finditer(tag.string.replace("*", " "), text)):

if index == tag_index:

entities.append((match.start(), match.end(), tag.name))

except Exception as e:

print(e, f"at line no {i+2}")

continue

i += 1

if entities:

train_data.append((text, {"entities": entities}))

return train_data



def train(training_data,old_training_data=None,model_name=None):

nlp = ""

pretrained_weights = Path('weights/model999.bin')

if model_name is not None:

nlp = spacy.load(model_name,weights=pretrained_weights)

else:

print("no model specified using default model")

nlp = spacy.load("en_core_web_sm")

if "ner" not in nlp.pipe_names:

print("there is no ner creating ner")

ner = nlp.create_pipe("ner")

nlp.add_pipe(ner,last=True)

else:

print("there is ner")

ner = nlp.get_pipe("ner")

for _,annotations in training_data:

for ent in annotations.get("entities"):

ner.add_label(ent[2])

start_time = time.time()

if model_name is not None:

# nlp.resume_training()

# TRAINING_DATA = populate_train_data(pd.read_csv(old_training_data))

TRAINING_DATA = old_training_data

revision_data =[]

for doc in nlp.pipe(list(zip(*TRAINING_DATA))[0]):

tags = [w.tag_ for w in doc]

heads = [w.head.i for w in doc]

deps = [w.dep_ for w in doc]

entities = [(e.start_char, e.end_char, e.label_) for e in doc.ents]

revision_data.append((doc, GoldParse(doc, entities=entities)))

fine_tune_data = []

for raw_text, entity_offsets in training_data:

doc = nlp.make_doc(raw_text)

try:

gold = GoldParse(doc,entities=entity_offsets['entities'])

except ValueError:

pass

fine_tune_data.append((doc,gold))

other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "ner"]

optimizer = nlp.entity.create_optimizer()

with nlp.disable_pipes(*other_pipes):

# pretrained_weights = Path('weights/model999.bin')

# with pretrained_weights.open("rb") as file_:

# ner.model.tok2vec.from_bytes(file_.read())

for i in range(20):

example_data = revision_data+fine_tune_data

# example_data = training_data

losses = {}

random.shuffle(example_data)

for batch in partition_all(2,example_data):

docs, golds = zip(*batch)

# print(docs, golds)

try:


nlp.update(docs,golds)

except ValueError:

pass

# print(losses)

else:

for i in range(20):

random.shuffle(training_data)

correct = 1

for text, annotations in training_data:

try:

nlp.update([text],[annotations])

print(correct)

correct +=1

except ValueError:

pass

# print("skipping..")

no_of_stars = i

print("*"*no_of_stars)

end_time = time.time()

print("this code took {}".format(end_time - start_time))

return nlp


def save_to_directory(nlp,directory_name):

save_directory = directory_name

for directory in save_directory:

if directory is not None:

directory_full_path = Path(directory+"_"+datetime.today().strftime('%Y_%m_%d'))

if path.exists(directory_full_path):

shutil.rmtree(directory_full_path)

print("folder already existed so removed")

if not directory_full_path.exists():

directory_full_path.mkdir()

nlp.to_disk(directory_full_path)

print("Saved model to output directory",directory)



if __name__ == "__main__":

training_data = populate_train_data(df)


# training_data = [

# ("I Like Today and Evening", {"entities":[(7,12,"DAY"),(17,24,"DAY")]}),

# ("Today is my lucky day", {"entities":[(1,5,"DAY")]}),

# ("Yesterday and Today are two same days of a month",{"entities":[(14,19,"DAY")]}),

# ("May Today is Best Day",{"entities":[(4,9,"DAY")]}),

# ("Have a Nice Today and Every Day",{"entities":[(12,17,"DAY")]}),

# ("Hey How are feeling Today",{"entities":[(20,25,"DAY")]}),

# ]

# print(training_data)

nlp = train(training_data)

save_to_directory(nlp,["trained_model_with_transfer_learning"])cytoolz import partition_all

import os

from os import path

import shutil

import json



df = pd.read_csv("new_annotations/dataset_transfer_learning1.csv")

def populate_train_data(df):

train_data = []

i =0

for d_index, row in df.iterrows():

print(row["annotations"])

content = row["annotations"].replace("\\n", "\n").replace("\n", " ")

content = re.sub(r"(?<=[:])(?=[^\s])", r" ", content)


# Finding tags and entities and store values in a entity list-----

soup = BeautifulSoup(content, "html.parser")

text = soup.get_text()

entities = []

for tag in soup.find_all():

if tag.string is None:

# failing silently for invalid tag

print(f'Tagging is invalid: {row["_id"], tag.name}, on row {i+2}skipping..')

continue


tag_index = content.split(str(tag))[0].count(tag.string)

try:

for index, match in enumerate(re.finditer(tag.string.replace("*", " "), text)):

if index == tag_index:

entities.append((match.start(), match.end(), tag.name))

except Exception as e:

print(e, f"at line no {i+2}")

continue

i += 1

if entities:

train_data.append((text, {"entities": entities}))

return train_data



def train(training_data,old_training_data=None,model_name=None):

nlp = ""

pretrained_weights = Path('weights/model999.bin')

if model_name is not None:

nlp = spacy.load(model_name,weights=pretrained_weights)

else:

print("no model specified using default model")

nlp = spacy.load("en_core_web_sm")

if "ner" not in nlp.pipe_names:

print("there is no ner creating ner")

ner = nlp.create_pipe("ner")

nlp.add_pipe(ner,last=True)

else:

print("there is ner")

ner = nlp.get_pipe("ner")

for _,annotations in training_data:

for ent in annotations.get("entities"):

ner.add_label(ent[2])

start_time = time.time()

if model_name is not None:

# nlp.resume_training()

# TRAINING_DATA = populate_train_data(pd.read_csv(old_training_data))

TRAINING_DATA = old_training_data

revision_data =[]

for doc in nlp.pipe(list(zip(*TRAINING_DATA))[0]):

tags = [w.tag_ for w in doc]

heads = [w.head.i for w in doc]

deps = [w.dep_ for w in doc]

entities = [(e.start_char, e.end_char, e.label_) for e in doc.ents]

revision_data.append((doc, GoldParse(doc, entities=entities)))

fine_tune_data = []

for raw_text, entity_offsets in training_data:

doc = nlp.make_doc(raw_text)

try:

gold = GoldParse(doc,entities=entity_offsets['entities'])

except ValueError:

pass

fine_tune_data.append((doc,gold))

other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "ner"]

optimizer = nlp.entity.create_optimizer()

with nlp.disable_pipes(*other_pipes):

# pretrained_weights = Path('weights/model999.bin')

# with pretrained_weights.open("rb") as file_:

# ner.model.tok2vec.from_bytes(file_.read())

for i in range(20):

example_data = revision_data+fine_tune_data

# example_data = training_data

losses = {}

random.shuffle(example_data)

for batch in partition_all(2,example_data):

docs, golds = zip(*batch)

# print(docs, golds)

try:


nlp.update(docs,golds)

except ValueError:

pass

# print(losses)

else:

for i in range(20):

random.shuffle(training_data)

correct = 1

for text, annotations in training_data:

try:

nlp.update([text],[annotations])

print(correct)

correct +=1

except ValueError:

pass

# print("skipping..")

no_of_stars = i

print("*"*no_of_stars)

end_time = time.time()

print("this code took {}".format(end_time - start_time))

return nlp


def save_to_directory(nlp,directory_name):

save_directory = directory_name

for directory in save_directory:

if directory is not None:

directory_full_path = Path(directory+"_"+datetime.today().strftime('%Y_%m_%d'))

if path.exists(directory_full_path):

shutil.rmtree(directory_full_path)

print("folder already existed so removed")

if not directory_full_path.exists():

directory_full_path.mkdir()

nlp.to_disk(directory_full_path)

print("Saved model to output directory",directory)



if __name__ == "__main__":

training_data = populate_train_data(df)


# training_data = [

# ("I Like Today and Evening", {"entities":[(7,12,"DAY"),(17,24,"DAY")]}),

# ("Today is my lucky day", {"entities":[(1,5,"DAY")]}),

# ("Yesterday and Today are two same days of a month",{"entities":[(14,19,"DAY")]}),

# ("May Today is Best Day",{"entities":[(4,9,"DAY")]}),

# ("Have a Nice Today and Every Day",{"entities":[(12,17,"DAY")]}),

# ("Hey How are feeling Today",{"entities":[(20,25,"DAY")]}),

# ]

# print(training_data)

nlp = train(training_data)

save_to_directory(nlp,["trained_model_with_transfer_learning"])


#to do train using batched

#add drop rate


Kod do szkolenia z nowym zbiorem danych i zapisz w innym katalogu

Uwaga: poniżej kod jest zapisywany w nowym pliku.


import spacy

from spacy import displacy

import pandas as pd

from annotations_training_spacy_31_oct_2019 import populate_train_data,train,save_to_directory



# test_texts = "I Like Today and Evening"

# base_training_data = [

# ("I Like Today and Evening", {"entities":[(7,12,"DAY"),(17,24,"DAY")]}),

# ("Today is my lucky day", {"entities":[(1,5,"DAY")]}),

# ("Yesterday and Today are two same days of a month",{"entities":[(14,19,"DAY")]}),

# ("May Today is Best Day",{"entities":[(4,9,"DAY")]}),

# ("Have a Nice Today and Every Day",{"entities":[(12,17,"DAY")]}),

# ("Hey How are feeling Today",{"entities":[(20,25,"DAY")]}),

# ]

test_text = test_texts


# new_data_set = [

# ("Today is an Awsome Day", {"entities":[(1,5,"DAY")]}),

# ]


nlp = train(training_data=new_data_set,old_training_data=base_training_data,model_name="trained_model_with_transfer_learning_8_2019_12_05")

save_to_directory(nlp,["trained_model_with_transfer_learning_9"])


doc = nlp(test_text)

print("ENTITIES in '%s'" % test_text)

nlp.add_pipe(nlp.create_pipe('sentencizer'))

sentence = list(doc.sents)

for ent in doc.ents:

print(ent.label_,ent.text)



displacy.serve(sentence, style='ent')


Jak widać, próbowałem też załadować stare tagi zestawów danych. ale nadal mam ten problem

Wiem, że niektóre osoby stanęły przed tym problemem, proszę, pomóż mi, jeśli ktoś rozwiązał ten problem.

Z góry dziękuję za pomoc przyjaciołom.

Cześć,

Próbuję wytrenować model przestrzenny dla ner. Mam zestaw danych z 2940 wierszami i wytrenowałem bazę

Model niech jego nazwa będzie current_model z tymi danymi i otrzymałem jeszcze 10 różnych zestawów danych

Każdy ma wiersze od 200 do 530 wierszy, więc załadowałem mój current_model przy użyciu spacy.load("current_model") spacy, a następnie trenowałem przy użyciu każdego zestawu danych. i próbowałem przewidzieć ner

Używając test data, rozpoznaje ner w nowym zbiorze danych, ale wydaje się, że zapomina ner w najstarszym zbiorze danych

Zrobiłem to, aby skrócić czas szkolenia. zobacz mój kod poniżej, aby zobaczyć, czego próbowałem

Robić i ja

kod do szkolenia modelu podstawowego


import spacy

from spacy.util import minibatch,compounding

import random

from pathlib import Path

from spacy import displacy

import re

import pandas as pd

from bs4 import BeautifulSoup

from datetime import datetime

imporcytoolz import partition_all

import os

from os import path

import shutil

import json



df = pd.read_csv("new_annotations/dataset_transfer_learning1.csv")

def populate_train_data(df):

train_data = []

i =0

for d_index, row in df.iterrows():

print(row["annotations"])

content = row["annotations"].replace("\\n", "\n").replace("\n", " ")

content = re.sub(r"(?<=[:])(?=[^\s])", r" ", content)


# Finding tags and entities and store values in a entity list-----

soup = BeautifulSoup(content, "html.parser")

text = soup.get_text()

entities = []

for tag in soup.find_all():

if tag.string is None:

# failing silently for invalid tag

print(f'Tagging is invalid: {row["_id"], tag.name}, on row {i+2}skipping..')

continue


tag_index = content.split(str(tag))[0].count(tag.string)

try:

for index, match in enumerate(re.finditer(tag.string.replace("*", " "), text)):

if index == tag_index:

entities.append((match.start(), match.end(), tag.name))

except Exception as e:

print(e, f"at line no {i+2}")

continue

i += 1

if entities:

train_data.append((text, {"entities": entities}))

return train_data



def train(training_data,old_training_data=None,model_name=None):

nlp = ""

pretrained_weights = Path('weights/model999.bin')

if model_name is not None:

nlp = spacy.load(model_name,weights=pretrained_weights)

else:

print("no model specified using default model")

nlp = spacy.load("en_core_web_sm")

if "ner" not in nlp.pipe_names:

print("there is no ner creating ner")

ner = nlp.create_pipe("ner")

nlp.add_pipe(ner,last=True)

else:

print("there is ner")

ner = nlp.get_pipe("ner")

for _,annotations in training_data:

for ent in annotations.get("entities"):

ner.add_label(ent[2])

start_time = time.time()

if model_name is not None:

# nlp.resume_training()

# TRAINING_DATA = populate_train_data(pd.read_csv(old_training_data))

TRAINING_DATA = old_training_data

revision_data =[]

for doc in nlp.pipe(list(zip(*TRAINING_DATA))[0]):

tags = [w.tag_ for w in doc]

heads = [w.head.i for w in doc]

deps = [w.dep_ for w in doc]

entities = [(e.start_char, e.end_char, e.label_) for e in doc.ents]

revision_data.append((doc, GoldParse(doc, entities=entities)))

fine_tune_data = []

for raw_text, entity_offsets in training_data:

doc = nlp.make_doc(raw_text)

try:

gold = GoldParse(doc,entities=entity_offsets['entities'])

except ValueError:

pass

fine_tune_data.append((doc,gold))

other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "ner"]

optimizer = nlp.entity.create_optimizer()

with nlp.disable_pipes(*other_pipes):

# pretrained_weights = Path('weights/model999.bin')

# with pretrained_weights.open("rb") as file_:

# ner.model.tok2vec.from_bytes(file_.read())

for i in range(20):

example_data = revision_data+fine_tune_data

# example_data = training_data

losses = {}

random.shuffle(example_data)

for batch in partition_all(2,example_data):

docs, golds = zip(*batch)

# print(docs, golds)

try:


nlp.update(docs,golds)

except ValueError:

pass

# print(losses)

else:

for i in range(20):

random.shuffle(training_data)

correct = 1

for text, annotations in training_data:

try:

nlp.update([text],[annotations])

print(correct)

correct +=1

except ValueError:

pass

# print("skipping..")

no_of_stars = i

print("*"*no_of_stars)

end_time = time.time()

print("this code took {}".format(end_time - start_time))

return nlp


def save_to_directory(nlp,directory_name):

save_directory = directory_name

for directory in save_directory:

if directory is not None:

directory_full_path = Path(directory+"_"+datetime.today().strftime('%Y_%m_%d'))

if path.exists(directory_full_path):

shutil.rmtree(directory_full_path)

print("folder already existed so removed")

if not directory_full_path.exists():

directory_full_path.mkdir()

nlp.to_disk(directory_full_path)

print("Saved model to output directory",directory)



if __name__ == "__main__":

training_data = populate_train_data(df)


# training_data = [

# ("I Like Today and Evening", {"entities":[(7,12,"DAY"),(17,24,"DAY")]}),

# ("Today is my lucky day", {"entities":[(1,5,"DAY")]}),

# ("Yesterday and Today are two same days of a month",{"entities":[(14,19,"DAY")]}),

# ("May Today is Best Day",{"entities":[(4,9,"DAY")]}),

# ("Have a Nice Today and Every Day",{"entities":[(12,17,"DAY")]}),

# ("Hey How are feeling Today",{"entities":[(20,25,"DAY")]}),

# ]

# print(training_data)

nlp = train(training_data)

save_to_directory(nlp,["trained_model_with_transfer_learning"])cytoolz import partition_all

import os

from os import path

import shutil

import json



df = pd.read_csv("new_annotations/dataset_transfer_learning1.csv")

def populate_train_data(df):

train_data = []

i =0

for d_index, row in df.iterrows():

print(row["annotations"])

content = row["annotations"].replace("\\n", "\n").replace("\n", " ")

content = re.sub(r"(?<=[:])(?=[^\s])", r" ", content)


# Finding tags and entities and store values in a entity list-----

soup = BeautifulSoup(content, "html.parser")

text = soup.get_text()

entities = []

for tag in soup.find_all():

if tag.string is None:

# failing silently for invalid tag

print(f'Tagging is invalid: {row["_id"], tag.name}, on row {i+2}skipping..')

continue


tag_index = content.split(str(tag))[0].count(tag.string)

try:

for index, match in enumerate(re.finditer(tag.string.replace("*", " "), text)):

if index == tag_index:

entities.append((match.start(), match.end(), tag.name))

except Exception as e:

print(e, f"at line no {i+2}")

continue

i += 1

if entities:

train_data.append((text, {"entities": entities}))

return train_data



def train(training_data,old_training_data=None,model_name=None):

nlp = ""

pretrained_weights = Path('weights/model999.bin')

if model_name is not None:

nlp = spacy.load(model_name,weights=pretrained_weights)

else:

print("no model specified using default model")

nlp = spacy.load("en_core_web_sm")

if "ner" not in nlp.pipe_names:

print("there is no ner creating ner")

ner = nlp.create_pipe("ner")

nlp.add_pipe(ner,last=True)

else:

print("there is ner")

ner = nlp.get_pipe("ner")

for _,annotations in training_data:

for ent in annotations.get("entities"):

ner.add_label(ent[2])

start_time = time.time()

if model_name is not None:

# nlp.resume_training()

# TRAINING_DATA = populate_train_data(pd.read_csv(old_training_data))

TRAINING_DATA = old_training_data

revision_data =[]

for doc in nlp.pipe(list(zip(*TRAINING_DATA))[0]):

tags = [w.tag_ for w in doc]

heads = [w.head.i for w in doc]

deps = [w.dep_ for w in doc]

entities = [(e.start_char, e.end_char, e.label_) for e in doc.ents]

revision_data.append((doc, GoldParse(doc, entities=entities)))

fine_tune_data = []

for raw_text, entity_offsets in training_data:

doc = nlp.make_doc(raw_text)

try:

gold = GoldParse(doc,entities=entity_offsets['entities'])

except ValueError:

pass

fine_tune_data.append((doc,gold))

other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "ner"]

optimizer = nlp.entity.create_optimizer()

with nlp.disable_pipes(*other_pipes):

# pretrained_weights = Path('weights/model999.bin')

# with pretrained_weights.open("rb") as file_:

# ner.model.tok2vec.from_bytes(file_.read())

for i in range(20):

example_data = revision_data+fine_tune_data

# example_data = training_data

losses = {}

random.shuffle(example_data)

for batch in partition_all(2,example_data):

docs, golds = zip(*batch)

# print(docs, golds)

try:


nlp.update(docs,golds)

except ValueError:

pass

# print(losses)

else:

for i in range(20):

random.shuffle(training_data)

correct = 1

for text, annotations in training_data:

try:

nlp.update([text],[annotations])

print(correct)

correct +=1

except ValueError:

pass

# print("skipping..")

no_of_stars = i

print("*"*no_of_stars)

end_time = time.time()

print("this code took {}".format(end_time - start_time))

return nlp


def save_to_directory(nlp,directory_name):

save_directory = directory_name

for directory in save_directory:

if directory is not None:

directory_full_path = Path(directory+"_"+datetime.today().strftime('%Y_%m_%d'))

if path.exists(directory_full_path):

shutil.rmtree(directory_full_path)

print("folder already existed so removed")

if not directory_full_path.exists():

directory_full_path.mkdir()

nlp.to_disk(directory_full_path)

print("Saved model to output directory",directory)



if __name__ == "__main__":

training_data = populate_train_data(df)


# training_data = [

# ("I Like Today and Evening", {"entities":[(7,12,"DAY"),(17,24,"DAY")]}),

# ("Today is my lucky day", {"entities":[(1,5,"DAY")]}),

# ("Yesterday and Today are two same days of a month",{"entities":[(14,19,"DAY")]}),

# ("May Today is Best Day",{"entities":[(4,9,"DAY")]}),

# ("Have a Nice Today and Every Day",{"entities":[(12,17,"DAY")]}),

# ("Hey How are feeling Today",{"entities":[(20,25,"DAY")]}),

# ]

# print(training_data)

nlp = train(training_data)

save_to_directory(nlp,["trained_model_with_transfer_learning"])


#to do train using batched

#add drop rate


Kod do szkolenia z nowym zbiorem danych i zapisz w innym katalogu

Uwaga: poniżej kod jest zapisywany w nowym pliku.


import spacy

from spacy import displacy

import pandas as pd

from annotations_training_spacy_31_oct_2019 import populate_train_data,train,save_to_directory



# test_texts = "I Like Today and Evening"

# base_training_data = [

# ("I Like Today and Evening", {"entities":[(7,12,"DAY"),(17,24,"DAY")]}),

# ("Today is my lucky day", {"entities":[(1,5,"DAY")]}),

# ("Yesterday and Today are two same days of a month",{"entities":[(14,19,"DAY")]}),

# ("May Today is Best Day",{"entities":[(4,9,"DAY")]}),

# ("Have a Nice Today and Every Day",{"entities":[(12,17,"DAY")]}),

# ("Hey How are feeling Today",{"entities":[(20,25,"DAY")]}),

# ]

test_text = test_texts


# new_data_set = [

# ("Today is an Awsome Day", {"entities":[(1,5,"DAY")]}),

# ]


nlp = train(training_data=new_data_set,old_training_data=base_training_data,model_name="trained_model_with_transfer_learning_8_2019_12_05")

save_to_directory(nlp,["trained_model_with_transfer_learning_9"])


doc = nlp(test_text)

print("ENTITIES in '%s'" % test_text)

nlp.add_pipe(nlp.create_pipe('sentencizer'))

sentence = list(doc.sents)

for ent in doc.ents:

print(ent.label_,ent.text)



displacy.serve(sentence, style='ent')


Jak widać, próbowałem też załadować stare tagi zestawów danych. ale nadal mam ten problem

Wiem, że niektóre osoby stanęły przed tym problemem, proszę, pomóż mi, jeśli ktoś rozwiązał ten problem.

Z góry dziękuję za pomoc przyjaciołom.

0
shahid khan 5 grudzień 2019, 14:05

1 odpowiedź

Najlepsza odpowiedź

Czy trenujesz nowy model, czy dodajesz go do istniejącego modelu przestrzennego? Jeśli zrobisz to później, wszystkie NN (wyuczona waga, cechy) będą niewyuczone i źle wyrównane, co spowoduje utratę dokładności. Mówię to z doświadczenia, kiedy chciałem wyszkolić koreańskie i japońskie nazwy, których spacy nie potrafił zidentyfikować. Możesz także wypróbować FastText, Flair i Polyglot i sprawdzić, czy osiąga swój cel. Spróbuj uzyskać zestaw wszystkich tych narzędzi i powinieneś mieć dobre wyniki. to jest rozwiązanie, którego użyłem w końcu.

1
Syenix 5 grudzień 2019, 11:27