👨‍👩‍👧‍👦 🏂 🧗 Cómo ayuda ML con la auditoría de calidad del servicio al cliente 👏🏽 🤸🏾 🐺

¿Es posible analizar las solicitudes de los clientes e identificar las causas de las críticas negativas en poco tiempo y sin altos costos laborales? En este artículo, queremos contarte cómo logramos resolver este problema con la ayuda de herramientas ML.

En nuestro trabajo, nos enfrentamos a la necesidad de evaluar la calidad del servicio al cliente. Nos enfrentábamos a la tarea de analizar las solicitudes de los clientes e identificar los motivos de la ocurrencia de críticas negativas sobre el servicio postventa de productos de seguros.

. - .

, , . , , .

, , , :

def cl_text(text):
    c = text.lower()
    c = re.sub(r'crm[^\n]+', '', c)
    c = re.sub(r':\s*\d{2}\s?\d{2}\s?\d{6}\s*', '', c)
    c = re.sub(r':\s*\d{2}\s?\d{2}\s?\d{6}\s*', '', c)
    c = re.sub(r' (  )?:\s*\d{2}\.?\d{2}\.?\d{4}\s*', '', c)
    c = re.sub(r'  :\s*\d{2}\.?\d{2}\.?\d{4}\s*', '', c)
    c = re.sub(r'  :\s*\d{2}\.?\d{2}\.?\d{4}\s*', '', c)
    c = re.sub(r' :\s*\d{2}\.?\d{2}\.?\d{4}\s*', '', c)
    c = re.sub(r' :[\S\W]\w*', '', c)
    c = re.sub(r'\n+', ' ', c)
    c = re.sub(r'\s+', ' ', c)
    c = re.sub(r"[A-Za-z!#$%&'()*+,./:;<=>?@[\]^_`{|}~—\"\-]+", ' ', c)
    return c.strip()

, , . - NLTK stopwords:

import pymorphy2
import nltk
morph = pymorphy2.MorphAnalyzer()
stopwords = nltk.corpus.stopwords.words('russian')
stopwords.extend(['','','','','',''])

def lemmatize(text):
    text = re.sub(r"\d+", '', text.lower()) #   
    for token in text.split():
        token = token.strip()
        token = morph.normal_forms(token)[0].replace('', '')
        if token and token not in stopwords: tokens.append(token)
    if len(tokens) > 2: ' '.join(tokens)
    return None


'CRM+XX.XX.XXXX XXXXXXXXXXXXX *** \n : XX.XX.XXXX\n .. [] , , , . \n : \n: XX XX XXXXXX'	' '

, , -. OneHotEncoding TF-IDF. , , – . , , , « » « » , .. .

Universal Sentence Embedder, , . . .

import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text

model = hub.load(r'/UniverseSentenseEmbeddings/USEv3')
embedding = model(‘    ’)

, 5 . , :

input1, input2 = [' '], [' ', ' ', ' ', ' ', ' ']
emb1, emb2 = model(input1), model(input2)
results_cosine = pairwise.cosine_similarity(emb1, emb2).tolist()[0]
for i, res in enumerate(results_cosine):
    print('"{}" <> "{}", cos_sim={:.3f}'.format(input1[0],input2[i],results_cosine[i]))

" " <> " ", cos_sim = 0.860
" " <> " ", cos_sim = 0.769
" " <> " ", cos_sim = 0.748
" " <> " ", cos_sim = 0.559
" " <> " ", cos_sim = 0.192

, , .

4 : DBSCAN, , kMeans MiniBatchKMeans. , .., , :

from sklearn.cluster import AgglomerativeClustering
num_clusters = 5
agglo1 = AgglomerativeClustering(n_clusters=num_clusters, affinity='euclidean') #cosine, l1, l2, manhattan
get_ipython().magic('time answer = agglo1.fit_predict(sent_embs)')

5 , , . – 10 :

cl = {}
for cluster, data in tqdm(report.groupby('AGGLOM'), desc=method):
    arr = ' '.join(data[''].values).split()
    arr_morph = []
    for k in arr:
        arr_morph.append(morph.parse(k)[0].normal_form) 
    cl[method+'_'+str(cluster)] = Counter([x.replace('', '') for x in arr_morph if x not in stopwords]).most_common(10)

- :

	10
AGGLOM_0	[('', 1548), ('', 786), ('', 565), ('', 552), ('', 494), ('', 427), ('', 371), ('', 73), ('', 45), ('', 40)]
AGGLOM_1	[('', 2984), ('', 2627), ('', 2205), ('', 2144), ('', 1932), ('', 1931), ('', 1688), ('', 1653), ('', 1571), ('', 1460)]	, ,
AGGLOM_2	[('', 3807), ('', 540), ('', 443), ('', 370), ('', 351), ('', 312), ('', 290), ('', 275), ('', 272), ('', 264)]
AGGLOM_3	[('', 1100), ('', 683), ('', 660), ('', 440), ('', 428), ('', 329), ('', 315), ('', 303), ('', 292), ('', 287)]	, .
AGGLOM_4	[('', 459), ('', 459), ('', 386), ('', 383), ('', 266), ('', 196), ('', 184), ('', 142), ('', 5), ('', 5)]

, , .

, .

Cómo ayuda ML con la auditoría de calidad del servicio al cliente

More articles: