#the dataset: Amazon Fine Food Reviews
import pandas as pd
import numpy as np
import seaborn as sns
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import datetime as dt
import datetime
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import re

Import the data set

df = pd.read_csv('Reviews.csv')

df.sample(100000)

Reshape and Explore Data

Initially, we need to explore the landscape of our data first and make a decision to selecte only essential attributes. Also, we can perform visualization in order to understand our data more.

print([col for col in df])

['Id', 'ProductId', 'UserId', 'ProfileName', 'HelpfulnessNumerator', 'HelpfulnessDenominator', 'Score', 'Time', 'Summary', 'Text']

#dislaimer: in this project, I will used only one-fifth of the dataset
df.shape

(568454, 10)

df0 = df.sample(frac = 0.20) # taking 20% of dataset
df0 = df0[['Id','ProfileName','Score', 'Time', 'Summary', 'Text']] # query only some attribute
df0.head()

id = np.arange(0,df0.shape[0]) 
id.shape

(113691,)

df0['id'] = id # insert new_id that has been created
df0.set_index("id", inplace = True) #setting as index_column
df0.pop('Id') # taking out the old one
df0

df1 = df0[['Time', 'ProfileName', 'Summary', 'Text', 'Score']] 
df1.head(20)

df1.shape

(113691, 5)

#Goal: visualize the proportion of reviews catagrorized by score

score_prop = df1.groupby('Score')['Text'].count()/len(df1.Score)*100
round(score_prop)

Score
1     9.0
2     5.0
3     7.0
4    14.0
5    64.0
Name: Text, dtype: float64

# declaring data
x = score_prop.to_list()
data = x
keys = ['Score 1', 'Score 2', 'Score 3', 'Score 4', 'Score 5']
  
# define Seaborn color palette to use
palette_color = sns.color_palette('RdBu')
  
# plotting data on chart
plt.pie(data, labels=keys, colors=palette_color, autopct='%.0f%%')
  
# displaying chart
plt.show()

#NOTE: The marjority of the plot is dominated by the reviews with Score 5, and this could lead to imbalance of data prediction.

# displaying the full text of reviews
with pd.option_context('display.max_colwidth', None):
  display(df1)

df1['Time'] = df1['Time'].apply(lambda x : datetime.datetime.fromtimestamp(x)) 
df1.head()

df1.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 113691 entries, 0 to 113690
Data columns (total 5 columns):
 #   Column       Non-Null Count   Dtype         
---  ------       --------------   -----         
 0   Time         113691 non-null  datetime64[ns]
 1   ProfileName  113688 non-null  object        
 2   Summary      113687 non-null  object        
 3   Text         113691 non-null  object        
 4   Score        113691 non-null  int64         
dtypes: datetime64[ns](1), int64(1), object(3)
memory usage: 5.2+ MB

# Create Sentiment Class
# Score 1-3: not satisfied
# Score 4-5: satisfied
df1['Satisfied'] = pd.cut(df1['Score'], bins =[0,3, float('inf')], labels =['not satisfied', 'satisfied'])
df1.iloc[::1000]

ax = df1['Satisfied'].value_counts().plot(kind='bar',
                                    figsize=(8,8),
                                    title="Sentiment of Customer Extraced from Restaurant's reviews")
ax.set_xlabel("Sentiment of Customer")
ax.set_ylabel("Frequency")
plt.show()

Data Preprocessing

Now, we will perform some pre-processing on the data before converting it into vectors and passing it to the machine learning model.

Objective: To reduce noise, which affect the accuracy rate of model prediction. Make it more simple for model to classify.

Method:
1) Using regular expresiion to get rid off any characters which are not alphabet and unnecssary
2) convert the string to lowercase
3) get rid off stopwords i.e 'the', 'an', 'to'; these are considres as noise which could make a model less precise
4) lemmatization: chang different form of word i.e. working -> work

#object of WordNetLemmatizer
#processing time: around 40 min
lm = WordNetLemmatizer()
def text_transformation(df_col):
    corpus = []
    for item in df_col:
        new_item = re.sub('[^a-zA-Z]',' ',str(item)) #match any characters which are not alphabet and replace with whitespace
        new_item = new_item.lower() # convert all to lower case
        new_item = new_item.split() # split each string by whitespace into a list
        # lemmarizing words & select only words which are not stopword in English 
        new_item = [lm.lemmatize(word) for word in new_item if word not in set(stopwords.words('english'))]
        corpus.append(' '.join(str(x) for x in new_item))
    return corpus
corpus = text_transformation(df1['Text'])

#Note: after cleaning text, there's some unwanted elements still
#so it's required to used regular expression to get rid of them (<br>)
pattern0 = r'<br />'
clean = []
for i in df1.text_clean:
    a = re.sub(pattern0, ' ', i)
    clean.append(a)
    
pattern1 = r'<br>'
clean1= []
for i in clean:
    b = re.sub(pattern1, ' ', i)
    clean1.append(b)
    
    
pattern2 = r'\s(br)\s'
clean2= []
for i in clean1:
    c = re.sub(pattern2, ' ', i)
    clean2.append(c)

"\npattern0 = r'<br />'\nclean = []\nfor i in df1.text_clean:\n    a = re.sub(pattern0, ' ', i)\n    clean.append(a)\n    \npattern1 = r'<br>'\nclean1= []\nfor i in clean:\n    b = re.sub(pattern1, ' ', i)\n    clean1.append(b)\n    \n    \npattern2 = r'\\s(br)\\s'\nclean2= []\nfor i in clean1:\n    c = re.sub(pattern2, ' ', i)\n    clean2.append(c)    \n    \n"

saveing the file

df1.to_pickle("df1_clean.pkl")

print(stopwords.words('english'))

['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 's', 't', 'can', 'will', 'just', 'don', "don't", 'should', "should've", 'now', 'd', 'll', 'm', 'o', 're', 've', 'y', 'ain', 'aren', "aren't", 'couldn', "couldn't", 'didn', "didn't", 'doesn', "doesn't", 'hadn', "hadn't", 'hasn', "hasn't", 'haven', "haven't", 'isn', "isn't", 'ma', 'mightn', "mightn't", 'mustn', "mustn't", 'needn', "needn't", 'shan', "shan't", 'shouldn', "shouldn't", 'wasn', "wasn't", 'weren', "weren't", 'won', "won't", 'wouldn', "wouldn't"]

df1 = pd.read_pickle("df1_clean.pkl") # reading pkl file

tmp =df1.iloc[::10000, [3, 6]]
with pd.option_context('display.max_colwidth', None):
  display(tmp)

WordClound

using wordclound to find the most frequency of word being used in review
it is required to convert pandas data serie (text_clean column) into a long string in a variable

note that the result is just the long string in a variabal, which we need to pass that to a wordclund object

#preparing data for wordcloud visualization
word = df1['text_clean']
comment_words = ""  # create empty string variable

i=0
j=0

#loop to each row in corpus and append them to comment_words variable
while j <= len(word)-1: #setting number of counter equal to number of observation -1, otherwise, out of inde
    i = word[j]
    comment_words +="".join(i) # for each word append into comment_words variable
    j = j+1 # increae the counter

len(comment_words)

type(comment_words)

comment_words[0:1000]

wordcloud = WordCloud(width = 1500, height = 1500,background_color ='white',min_font_size = 10).generate(comment_words)
plt.figure(figsize=(15, 10))
plt.imshow(wordcloud)
plt.title('High Frequency of Words Found in Customer Reviews')
#plt.savefig('wordclound.png') # set the file to png.
plt.show()

do the visualization with heatmap Assumption: different score review should have different position in vector space so we will utilize heat map to answer the question that the reviews with different range of score are really different in vector space

A short note of what is Word Embedding

Word Embedding Word Embeddings are the texts converted into numbers and there may be different numerical representations of the same text In short, we can say that to build any model in machine learning or deep learning, the final level data has to be in numerical form because models don’t understand text or image data directly as humans do. Therefore, Vectorization or word embedding is the process of converting text data to numerical vectors. Later those vectors are used to build various machine learning models. In this manner, we say this as extracting features with the help of text with an aim to build multiple natural languages, processing models, etc. We have different ways to convert the text data to numerical vectors which we will discuss in this article later. Broadly, we can classified word embeddings into the following two categories: Frequency-based or Statistical based Word Embedding Prediction based Word Embedding

catagorize reviews into two groups: score 4 and 5, score <= 3

filter0 = df1['Score'] == 5
score_5 = df1[filter0]

#filter only text_clen which score <4
filter1 = df1['Score'] < 4
score_1to3 = df1[filter1]

score_5 = score_5[['Time','ProfileName','text_clean','Score']].iloc[0:500] #must be in the same shape

score_1to3 = score_1to3[['Time','ProfileName','text_clean','Score']].iloc[0:500] #must be in the same shape

tf_score5 = score_5

score_5

score_1to3

Transform Text to Vector

from sklearn.feature_extraction.text import CountVectorizer
count_vect = CountVectorizer()
tf_score5  = count_vect.fit_transform(score_5['text_clean'])
tf_score5
tf_score1to3  = count_vect.fit_transform(score_1to3['text_clean'])
tf_score1to3

<500x4287 sparse matrix of type '<class 'numpy.int64'>'
	with 18782 stored elements in Compressed Sparse Row format>

tf_score1to3.shape

(500, 4287)

Cosine Similarity

After some kind of transforming text to vector, we need to reshape sparse matrix so we can use a coins_similarity function to generate its cosine similarity. Cosine Similarity is one of the method to measure the distance of different data points in vector space and , in our case, we will implement that and visualize cosine similarity of those reviews with heat map.

Reshape Sparse matrix

tf_score5=tf_score5[0:500, 0:3511].toarray() #reshape sparse matrix

tf_score1to3=tf_score1to3[0:500, 0:3511].toarray()#reshape sparse matrix

tf_score1to3

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=int64)

Get the Cosine Score

from sklearn.metrics.pairwise import cosine_similarity 
cosinescore = cosine_similarity(tf_score5 ,tf_score1to3)

cosinescore

array([[0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.04783649, 0.        ,
        0.03181424],
       [0.        , 0.01756821, 0.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.02179068, 0.03846154, ..., 0.        , 0.        ,
        0.        ]])

Heatmap

plot_z = cosinescore[0:40, 0:40]

import seaborn as sns

df_todraw = pd.DataFrame(plot_z)
plt.subplots(figsize=(20, 15))
ax = sns.heatmap(df_todraw,
                 cmap="YlGnBu",
                 vmin=0, vmax=1, annot=True, fmt='.1f')
plt.show()

#Note: the heat map showing us that there's less similarity between these two group of review, which it is supposed to be like that
#because they have significanet different range of score.

compare score 5 to another 5 score review

cosinescore5 = cosine_similarity(tf_score5 ,tf_score5)

cosinescore5

array([[1.        , 0.        , 0.10606602, ..., 0.05      , 0.        ,
        0.0438529 ],
       [0.        , 1.        , 0.10882144, ..., 0.15389675, 0.086711  ,
        0.08998425],
       [0.10606602, 0.10882144, 1.        , ..., 0.10606602, 0.02988072,
        0.09302605],
       ...,
       [0.05      , 0.15389675, 0.10606602, ..., 1.        , 0.08451543,
        0.0438529 ],
       [0.        , 0.086711  , 0.02988072, ..., 0.08451543, 1.        ,
        0.03706247],
       [0.0438529 , 0.08998425, 0.09302605, ..., 0.0438529 , 0.03706247,
        1.        ]])

plot_zz = cosinescore5[0:40, 41:81]

plot_x = list(range(41,81))

import seaborn as sns

df_todraw2 = pd.DataFrame(plot_zz, columns = plot_x)
plt.subplots(figsize=(20, 15))
ax = sns.heatmap(df_todraw2,
                 cmap="YlGnBu",
                 vmin=0, vmax=1, annot=True, fmt='.1f')
plt.show()

#Note: While comparing between those review with only 5 score, they show the likelihood of being similar more.

pulling out some review which has high correlation and see how they being similar

score_5.iloc[3,2]

'herr favorite chip brand fan salsa love chip'

score_5.iloc[60,2]

'cannot tolerate extremely hot spicy chip like zing crunch chip made enjoyment purchased around holiday truly enjoyed many guest yes purchasing chip really good organic affordable'

score_5.iloc[33,2]

'love arizona green tea ginseng drink time simple easy carry packet purse'

score_5.iloc[78,2]

'love love green tea hard find area place internet charge big price usually get many box merchant definitely order seller thanks depend green tea fix everyday'

score_5.iloc[36,2]

'cereal tasty healthy spice bit good add banana walnut coconut shaving mmm good'

score_5.iloc[37,2] # score 0.0, this review is about cereal

'love chip longer crave regular potato chip tasty crunchy alot salt tho'

Result: those pairs ,which receive cosine similarity at 0.4 and 0.3, are all good revew about chip

tmp =df1.iloc[::10000, [3, 6]]
with pd.option_context('display.max_colwidth', None):
  display(tmp)

showing the proportion of our review catagorized by 'satisfied' and 'not satisfied' labels

ax = df1['Satisfied'].value_counts().plot(kind='bar',
                                    figsize=(8,8),
                                    title="Sentiment of Customer Extraced from Restaurant's reviews")
ax.set_xlabel("Sentiment of Customer")
ax.set_ylabel("Frequency")
plt.show()

Because the review of cutomer comprise of 'satisfied review' more thatn 'not satisfied review' significantly, this could lead to 'Imbalanced of sentimental class',which might affect model to be biased. However, in this report, dealing with that issue is out of scope so we will randome pick samples from both group in the equal amount

Get a sample set

Funtion to get sample set

# the goal of doing thing because we want to eliminate the imbalancing of data set
def get_top_data(top_n = 20000):
    top_data_df_positive = df1[df1['Satisfied'] == 'satisfied'].head(top_n)
    top_data_df_negative = df1[df1['Satisfied'] == 'not satisfied'].head(top_n)
    top_data_df_small = pd.concat([top_data_df_positive, top_data_df_negative])
    return top_data_df_small

df2 = get_top_data(top_n=20000)

ax = df2['Satisfied'].value_counts().plot(kind='bar',
                                    figsize=(8,8),
                                    title="Sentiment of Customer Extraced from Restaurant's reviews")
ax.set_xlabel("Sentiment of Customer")
ax.set_ylabel("Frequency")
plt.show()

#the problem of imbalancing data set is gone

#seperate text into single word and this will help when transforming text to numeric value

from gensim.utils import simple_preprocess
# Tokenize the text column to get the new column 'tokenized_text'
df2['tokenized_text'] = [simple_preprocess(line, deacc=True) for line in df2['text_clean']] 
print(df2['tokenized_text'].head(10))

id
0     [keep, cup, counter, well, made, sturdy, produ...
1     [bar, pretty, good, taste, like, cinnamon, app...
2     [love, pantry, cook, batch, rice, add, sauce, ...
3     [used, another, brand, tonkotsu, flavor, noodl...
4     [herr, favorite, chip, brand, fan, salsa, love...
5     [absolutely, good, french, vanilla, cappuccino...
6     [cheaper, chain, cup, make, home, stuff, aweso...
8     [bought, coffee, amazon, special, promotion, g...
9     [dog, love, zuke, treat, one, acceptation, lik...
11    [cereal, like, chex, healthier, outstanding, f...
Name: tokenized_text, dtype: object

[col for col in df2]

['Time',
 'ProfileName',
 'Summary',
 'Text',
 'Score',
 'Satisfied',
 'text_clean',
 'tokenized_text']

from gensim.parsing.porter import PorterStemmer
porter_stemmer = PorterStemmer()
# Get the stemmed_tokens
df2['stemmed_tokens'] = [[porter_stemmer.stem(word) for word in tokens] for tokens in df2['tokenized_text'] ]
df2['stemmed_tokens'].head(10)

id
0     [keep, cup, counter, well, made, sturdi, produ...
1     [bar, pretti, good, tast, like, cinnamon, appl...
2     [love, pantri, cook, batch, rice, add, sauc, d...
3     [us, anoth, brand, tonkotsu, flavor, noodl, im...
4     [herr, favorit, chip, brand, fan, salsa, love,...
5     [absolut, good, french, vanilla, cappuccino, b...
6     [cheaper, chain, cup, make, home, stuff, aweso...
8     [bought, coffe, amazon, special, promot, go, e...
9     [dog, love, zuke, treat, on, accept, like, muc...
11    [cereal, like, chex, healthier, outstand, flav...
Name: stemmed_tokens, dtype: object

tmp =df2.iloc[::2000, [6, 7]]
with pd.option_context('display.max_colwidth', None):
  display(tmp)

Splitting into Train and Test Sets:

Train data would be used to train the model and test data is the data on which the model would predict the classes and it will be compared with original labels to check the accuracy or other model test metrics.

NOTE: In this case I will split data into 70:30

df2

Split train_test set

from sklearn.model_selection import train_test_split
# Train Test Split Function
def split_train_test(df2, test_size=0.3, shuffle_state=True):
    X_train, X_test, Y_train, Y_test = train_test_split(df2[['stemmed_tokens']], 
                                                        df2['Satisfied'], 
                                                        shuffle=shuffle_state,
                                                        test_size=test_size, 
                                                        random_state=15)
    print("Value counts for Train sentiment")
    print(Y_train.value_counts())
    print('\n')
    print("Value counts for Test sentiments")
    print(Y_test.value_counts())
    print('\n')
    print(type(X_train))
    print(type(Y_train))
    print('\n')
    X_train = X_train.reset_index()
    X_test = X_test.reset_index()
    Y_train = Y_train.to_frame()
    Y_train = Y_train.reset_index()
    Y_test = Y_test.to_frame()
    Y_test = Y_test.reset_index()
    print(X_train.head())
    return X_train, X_test, Y_train, Y_test

X_train, X_test, Y_train, Y_test = split_train_test(df2)

Value counts for Train sentiment
satisfied        14027
not satisfied    13973
Name: Satisfied, dtype: int64


Value counts for Test sentiments
not satisfied    6027
satisfied        5973
Name: Satisfied, dtype: int64


<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.series.Series'>


      id                                     stemmed_tokens
0  50319  [pleas, try, avoid, disappoint, wai, pack, qua...
1  58934  [deliveri, quick, easi, howev, product, best, ...
2  30562  [order, brother, law, like, coffe, ship, slow,...
3   6542  [on, best, chocol, bar, tast, recommend, frien...
4  55060  [want, like, coffe, like, bui, came, huge, bag...

x_train: The training part of the first sequence (x) x_test: The test part of the first sequence (x) y_train: The training part of the second sequence (y) y_test: The test part of the second sequence (y)

More Detail: splitting training ans testing set https://realpython.com/train-test-split-python-data/#:~:text=x_train%20%3A%20The%20training%20part%20of,of%20the%20second%20sequence%20(%20y%20)

X_train

X_test

Y_train

Y_test

Word2Vec

Feature Extraction we will use Word2Vec Model, which is a pre-trained model to fitting

from gensim.models import Word2Vec
import time
# Skip-gram model (sg = 1)
vector_size=1000
window = 5
min_count = 1
workers = 3
sg = 1

word2vec_model_file =  'word2vec_' + str(vector_size) + '.model'
start_time = time.time()
stemmed_tokens = pd.Series(df2['stemmed_tokens']).values
# Train the Word2Vec Model
w2v_model = Word2Vec(stemmed_tokens, min_count = min_count,vector_size=vector_size ,workers = workers, window = window, sg = sg)
print("Time taken to train word2vec model: " + str(time.time() - start_time))

# Because this process might take a long time as well, so i save the file 'word2vec_model_file'
w2v_model.save(word2vec_model_file)

Time taken to train word2vec model: 68.21583843231201

#and now, it being able to find some kind of correlation between those words

# Load the model from the model file
w2v_model = Word2Vec.load(word2vec_model_file)

# Most Similar word
print(w2v_model.wv.most_similar('well'))

#Now the model know that some words  which have the similar meaning would be represented by corresponding value 
w2v_model.wv.similarity('good', 'worthwhil')

[('lastli', 0.6035114526748657), ('strictli', 0.5986160039901733), ('perfectli', 0.5976307988166809), ('bravo', 0.5926766395568848), ('newest', 0.5891979336738586), ('swap', 0.5885021686553955), ('definitli', 0.5863612294197083), ('gosh', 0.5830129384994507), ('creativ', 0.5791662335395813), ('lacklust', 0.5786160230636597)]

0.69994974

w2v_model.wv.doesnt_match(['good', 'charm', 'amazingli','bad','well'])

'bad'

w2v_model.wv.similarity('good', 'worthwhil')

0.69994974

w2v_model.wv.similarity('bad', 'bitter')

0.36606473

w2v_model.wv.similarity('bad', 'good') # need to fix this

0.5548429

w2v_model.wv.most_similar(positive="bad")

[('keen', 0.6243119239807129),
 ('terribl', 0.6219682693481445),
 ('becuas', 0.6180531978607178),
 ('echo', 0.6136906743049622),
 ('yucki', 0.6117876172065735),
 ('swear', 0.607035219669342),
 ('medicinei', 0.6067305207252502),
 ('wierd', 0.6061832308769226),
 ('unpalat', 0.6051055788993835),
 ('interestingli', 0.604747474193573)]

w2v_model.wv.most_similar(positive="chip")

[('popchip', 0.6958118081092834),
 ('kettl', 0.6679425239562988),
 ('ahoi', 0.6633185744285583),
 ('tortilla', 0.6560363173484802),
 ('frito', 0.648652195930481),
 ('terra', 0.6212309002876282),
 ('lai', 0.6164361834526062),
 ('mesquit', 0.6151025295257568),
 ('pringl', 0.6080288887023926),
 ('pretzel', 0.6054346561431885)]

Core Process of Word2Vec

From now, we need will work with traing set to fitting the model before making prediction. we loop through X_train and X_test, which previously splitted beforehand and we kind of find the mean of each vector in a reviewand used that as a representative of tone in that review

# we find the mean of vector in each review and used that as a representative of tone in that review
#write them into csv file.
word2vec_filename = 'train_review_word2vec.csv'
with open(word2vec_filename, 'w+') as word2vec_file:
    for index, row in X_train.iterrows():
        model_vector = (np.mean([w2v_model.wv[token] for token in row['stemmed_tokens']], axis=0)).tolist()
        if index == 0:
            header = ",".join(str(ele) for ele in range(1000))
            word2vec_file.write(header)
            word2vec_file.write("\n")
        # Check if the line exists else it is vector of zeros
        if type(model_vector) is list:  
            line1 = ",".join( [str(vector_element) for vector_element in model_vector] )
        else:
            line1 = ",".join([str(0) for i in range(1000)])
        word2vec_file.write(line1)
        word2vec_file.write('\n')

C:\Python\lib\site-packages\numpy\core\fromnumeric.py:3440: RuntimeWarning: Mean of empty slice.
  return _methods._mean(a, axis=axis, dtype=dtype,
C:\Python\lib\site-packages\numpy\core\_methods.py:189: RuntimeWarning: invalid value encountered in double_scalars
  ret = ret.dtype.type(ret / rcount)

#find the mean
#Also,write them into csv file.
word2vec_filename = 'test_review_word2vec.csv'
with open(word2vec_filename, 'w+') as word2vec_file:
    for index, row in X_test.iterrows(): #itterows(); used to loop over each review annd find the mean to represent each review
        model_vector = (np.mean([w2v_model.wv[token] for token in row['stemmed_tokens']], axis=0)).tolist()
        if index == 0:
            header = ",".join(str(ele) for ele in range(1000))
            word2vec_file.write(header)
            word2vec_file.write("\n")
        # Check if the line exists else it is vector of zeros
        if type(model_vector) is list:  
            line1 = ",".join( [str(vector_element) for vector_element in model_vector] )
        else:
            line1 = ",".join([str(0) for i in range(1000)])
        word2vec_file.write(line1)
        word2vec_file.write('\n')

import time
#import RandomForestClassifier, this is the algorithm that will be used for classification
from sklearn.ensemble import RandomForestClassifier

# Load from the filename
trainvec = pd.read_csv('train_review_word2vec.csv') # training
testvec = pd.read_csv('test_review_word2vec.csv') # testing

#Initialize the model
forest_word2vec = RandomForestClassifier(n_estimators = 100)

start_time = time.time()
# Fit the model
forest_word2vec.fit(trainvec, Y_train['Satisfied']) # fitting the model; find the coefficients or the model
print("Time taken to fit the model with word2vec vectors: " + str(time.time() - start_time))

Time taken to fit the model with word2vec vectors: 81.12437176704407

# the result is either the review in testset is 'satisfied', or 'not satisfied
result = forest_word2vec.predict(testvec)

result.shape

(12000,)

result[::10]

array(['not satisfied', 'satisfied', 'satisfied', ..., 'not satisfied',
       'satisfied', 'satisfied'], dtype=object)

Y_test['Predict'] = result

Y_test['review'] = X_test['stemmed_tokens']

Y_test[::500]

Evaluate the model

from sklearn.metrics import classification_report, confusion_matrix
print(classification_report(Y_test['Satisfied'],result, zero_division=0))

               precision    recall  f1-score   support

not satisfied       0.80      0.80      0.80      6027
    satisfied       0.80      0.80      0.80      5973

     accuracy                           0.80     12000
    macro avg       0.80      0.80      0.80     12000
 weighted avg       0.80      0.80      0.80     12000

Confusion Matrix

cf_matrix = confusion_matrix(Y_test['Satisfied'], result)

cf_matrix

array([[4817, 1210],
       [1204, 4769]], dtype=int64)

import seaborn as sns

ax = sns.heatmap(cf_matrix, annot=True, cmap='YlGn', fmt='.1f')

ax.set_title('Seaborn Confusion Matrix with labels\n\n');
ax.set_xlabel('\nPredicted Values')
ax.set_ylabel('Actual Values ');

## Ticket labels - List must be in alphabetical order
ax.xaxis.set_ticklabels(['False','True'])
ax.yaxis.set_ticklabels(['False','True'])

## Display the visualization of the Confusion Matrix.
plt.show()

x_train: The training part of the first sequence (x) x_test: The test part of the first sequence (x) y_train: The training part of the second sequence (y) y_test: The test part of the second sequence (y)

Special Thanks to: https://medium.com/swlh/sentiment-classification-using-word-embeddings-word2vec-aedf28fbb8ca and all related wonderful post in Stack Overflow

	Id	ProductId	UserId	ProfileName	HelpfulnessNumerator	HelpfulnessDenominator	Score	Time	Summary	Text
158836	158837	B00330R1P6	A2PAD826IH1HFE	J Scott Morrison	1	1	5	1305763200	A Perfect Snack	I am on a mongo calorie-counting diet -- and h...
176007	176008	B007TGO1U8	AYIT1EFS9SVTD	Elizabeth A. Keep	0	0	5	1348185600	A Sweet Surprise	Usually picky about my sweeteners but unable t...
98433	98434	B004GW6O9E	A3F7WHZDCEMHQZ	M. Moore	0	0	5	1330992000	Tastes great!	Several people have written in to say that the...
175551	175552	B0017WG180	A6BESLLH4LTR	Karen Jones "Kajwords"	1	2	3	1309132800	It takes just a little!	The price is great and the product is ok. It'...
354005	354006	B000MNRN14	A8FC9GTS7UNBJ	Elizabeth White	3	3	5	1236816000	Always Yummy!	This is the third or fourth time I've purchase...
...	...	...	...	...	...	...	...	...	...	...
81119	81120	B0040DWCXG	A9DCQPOOK1XC0	Dos box	4	4	5	1330041600	great food additive with other uses	This is a great deal for Xanthan gum. I used s...
504662	504663	B007I7YZJK	A2VHCA21QKNEKE	M. Bruner ""Deltareviewer" Real Page ...	1	1	2	1348099200	Yuck	So we gave this the old college try but it fai...
521137	521138	B008ATDIDE	A3HHER92LK8DA1	Joel D. Weber	5	7	2	1309651200	Definite side effects	First let me say that I eat a healthy, balance...
56111	56112	B0002DIO2G	A2XS5DL7BP12W0	Andrew William "Andrew William"	0	0	5	1286928000	Cat Heaven	Pros:<br /><br />Good bulk for your money (3.5...
58961	58962	B001CRQQXU	A17HCQN18EBHRT	Cindy L. Stern "world traveler"	0	2	5	1232668800	The cafe you can carry in your pocket!	I love Java Juice--not only for the way it tas...

	Id	ProfileName	Score	Time	Summary	Text
231924	231925	Kenneth Dillibe	5	1327795200	I have found it	I went through so many weak "hot" mustards bef...
486992	486993	Nannyem	5	1333324800	Fantastic	We have been using this product for over a yea...
373746	373747	ses	5	1327104000	Makes making baby food super easy!	I love this! I'm a kitchen gadget person and w...
445098	445099	L. Paul	5	1311724800	Good quality food	It's difficult to review a food when each dogs...
339448	339449	Sterghe	5	1325203200	We love these!!	We've now tried not only this item, but also t...

	ProfileName	Score	Time	Summary	Text
id
0	Kenneth Dillibe	5	1327795200	I have found it	I went through so many weak "hot" mustards bef...
1	Nannyem	5	1333324800	Fantastic	We have been using this product for over a yea...
2	ses	5	1327104000	Makes making baby food super easy!	I love this! I'm a kitchen gadget person and w...
3	L. Paul	5	1311724800	Good quality food	It's difficult to review a food when each dogs...
4	Sterghe	5	1325203200	We love these!!	We've now tried not only this item, but also t...
...	...	...	...	...	...
113686	Arctic Highlander	1	1325462400	Bitter lime taste	This has a strongly bitter taste of lime when ...
113687	C. Harr	5	1252627200	Everyone loves these taco shells!	Great taste and texture. No one would know th...
113688	Dance Dance Dance	5	1323216000	good	I really love Izze because its healthier than ...
113689	Robert W. Doty	4	1271116800	Gloria Jean Hazelnut	Never to worry when I run out of my hazlenut c...
113690	Dwight	1	1178668800	Every can dented at least once, liquified red ...	I received the blackberries, loganberries and ...

	Time	ProfileName	Summary	Text	Score
id
0	1327795200	Kenneth Dillibe	I have found it	I went through so many weak "hot" mustards bef...	5
1	1333324800	Nannyem	Fantastic	We have been using this product for over a yea...	5
2	1327104000	ses	Makes making baby food super easy!	I love this! I'm a kitchen gadget person and w...	5
3	1311724800	L. Paul	Good quality food	It's difficult to review a food when each dogs...	5
4	1325203200	Sterghe	We love these!!	We've now tried not only this item, but also t...	5
5	1335916800	pebblepuppy	Good neutral intensity coffee	I got this for my son-in-law and daughter who ...	4
6	1329696000	c2	Excellent product!	I'm always amazed at how one company can const...	5
7	1197072000	G. Kerr	Great lean snack - ostrich!	These meat snacks are fabulous and incredibly ...	5
8	1338249600	wendy	thanks jet blue!	gee, delta gave me biscoff and now you give me...	4
9	1270425600	Yarii	Truly the best	They aren't kidding when they call them Bush's...	5
10	1349308800	Serenity "luvtoknit"	Walkers NonSuch Toffee	Ona rating of 1 to 10, Walkers Non Such Toffee...	5
11	1349913600	Greg	Fabulous Cornbread	This cornbread is so excelllent that my entire...	5
12	1278201600	Gary Upshaw	I love cardamom tea	nicely packaged in air tight bag so the seeds ...	5
13	1099353600	Scamp Lumm "Littlesorrel/christian zionist"	MY FAVORITE CANDY!	Twenty four ropes per box, that's a lot of rop...	5
14	1279929600	Fielden A. Coleman "Coleblooded1"	Good Taste!! Good Price too!!	The bar is pretty good. Taste more like cinnam...	4
15	1222646400	Emma	My favorite coffee	I love this coffee. I turn on the machine befo...	5
16	1296086400	newmom26	Great product but not airtight	This product is a must-have for any parents ma...	4
17	1348790400	E. Mack	Great Flavor & Coffee	It's a great flavor and makes a great cup of c...	5
18	1335398400	sarabella	Tasty and handy for a quick snack	I use these as quick snacks for my 18-month ol...	4
19	1293840000	P. shipley	DEEEEEEEEEELICIOUS	these peanuts were absolutely heavenly! i had...	5

	Time	ProfileName	Summary	Text	Score
id
0	1327795200	Kenneth Dillibe	I have found it	I went through so many weak "hot" mustards before arriving at this, this is the very popular Chinese hot mustard you have been looking for that takes your breath away, unfortunately you may have to eat it alone, most of my friends don't think it is funny.	5
1	1333324800	Nannyem	Fantastic	We have been using this product for over a year now and that should speak volumes because we are a popcorn loving bunch! The convenience of having everything measured and ready to go at a minute's notice is great. I have a popcorn machine in the media room and so we love to munch popcorn while watching movies.....we feel we are at a theater eating the "real deal". I will continue to purchase even though it isn't the most figure friendly popcorn....taste outweighs the waist....tee-he.	5
2	1327104000	ses	Makes making baby food super easy!	I love this! I'm a kitchen gadget person and what better than a kitchen gadget for baby? On a serious note, I have been using this for about a month now. I love the all in one thing going on. It makes about 8-12oz with a full basket of fruits or veggies easy to make batches to freeze for later! I have not tried the pasta/rice cooker yet, but will soon. I love how easy it makes making babyfood and is easy to clean up.	5
3	1311724800	L. Paul	Good quality food	It's difficult to review a food when each dogs tastes may vary. This food is a high quality, grain free food. The kibble is small (slightly smaller than ToTW). I moved my dog onto it after she spontaneously decided that she didn't like ToTW Pacific Stream anymore. Reviews on dogfoodanalysis.com and dogfoodadvisor.com both list the food as a high quality food. I have seen comments on using asorbic acid as a preservative that can lead to bloating if you pre-wet the food. If that's the case, well, just don't pre-wet it. My dog is loving this food and has been on it for several months. Her coat and breath are both good. I also add salmon oil and royal jelly to her food, so I don't know if it's the food or not. She also has Antler chews in the house. Either way, I'm just happy that the fussy eater is eating. The subscribe and save option offers about $10 per bag off of the regular price, plus I'm a prime member so I don't pay shipping. They offer the different flavors in small bags if you're interested in trying it out before investing in the biggers bags.	5
4	1325203200	Sterghe	We love these!!	We've now tried not only this item, but also the "mojito" and "appletini" flavors, and they're all hits in our household! In fact, these are the first drinks I've found that get my partner to cut back on the caffeine and sugar and reach for this instead. I personally like the margarita flavor the best of the three, but I also like the real margaritas better than the other two mixed drinks; your opinion may vary. These have now become a staple purchase for our family.	5
...	...	...	...	...	...
113686	1325462400	Arctic Highlander	Bitter lime taste	This has a strongly bitter taste of lime when made as directed. Definitely not a natural made from scratch taste. It made the worst tasting Margaritas that I've ever had. My initial response is to just pour the rest of it down the drain and cut my losses. I'm glad I only bought one bottle to try.	1
113687	1252627200	C. Harr	Everyone loves these taco shells!	Great taste and texture. No one would know these are gluten free. My whole family loves these. I buy them by the case.	5
113688	1323216000	Dance Dance Dance	good	I really love Izze because its healthier than other sodas. The only thing I don't like its that it doesn't come in two liter size and it is expensive.	5
113689	1271116800	Robert W. Doty	Gloria Jean Hazelnut	Never to worry when I run out of my hazlenut creamer, as this coffee is so tasty and so full of hazlenut flavor, that I don't miss the creamer!<br />Good service from Amazon to let me know when the order is on its way and always arrives perfectly packed and on time. Good stuff.	4
113690	1178668800	Dwight	Every can dented at least once, liquified red bumps	I received the blackberries, loganberries and boysenberries intact so I was very surprised to find that the raspberries seemed to have been beaten and then boiled to mush. Amazon issued a full refund but I feel bad that I don't have raspberries in my pantry and I am too flummoxed to reorder and try my luck again.<br /><br />I am taking pictures and will post images shortly!	1

	Time	ProfileName	Summary	Text	Score
id
0	2012-01-29 08:00:00	Kenneth Dillibe	I have found it	I went through so many weak "hot" mustards bef...	5
1	2012-04-02 08:00:00	Nannyem	Fantastic	We have been using this product for over a yea...	5
2	2012-01-21 08:00:00	ses	Makes making baby food super easy!	I love this! I'm a kitchen gadget person and w...	5
3	2011-07-27 08:00:00	L. Paul	Good quality food	It's difficult to review a food when each dogs...	5
4	2011-12-30 08:00:00	Sterghe	We love these!!	We've now tried not only this item, but also t...	5

	Text	text_clean
id
0	Keeps the cups off the counter!! It is a very well made sturdy product. It is a little stiff to pull down but but I'd rather that than falling down by it's self. We used the included screws to make sure it stays up. We are very pleased.	keep cup counter well made sturdy product little stiff pull rather falling self used included screw make sure stay pleased
10000	My son loves this food. He is 16 months now and I still use them. Not all the time, but often. The reason is that his molars are coming in very quickly and he is in a lot of pain. He won't eat much when he's in pain, but these are easy on his gums. They are organic and a quick meal. My son is strong and a very healthy weight. I make sure he gets as much organic, wholesome food as possible. Buying these subscribe and save is a great way to give him good food and still save money. He loves the whole line.	son love food month still use time often reason molar coming quickly lot pain eat much pain easy gum organic quick meal son strong healthy weight make sure get much organic wholesome food possible buying subscribe save great way give good food still save money love whole line
20000	Finally!! I love my Keurig, and I don't mind buying the k-cups. What does bother me is the limited availability of flavored decaf coffees. I am a coffee addict and could drink it all day if it didn't keep me up all night. I have tried every other do it yourself k cup product out there and they all stink! This product actually works. It brews a decent cup of coffee, not watered down and not a single ground of coffee in the cup. This product is worth the money! Thinking about buying a second!	finally love keurig mind buying k cup bother limited availability flavored decaf coffee coffee addict could drink day keep night tried every k cup product stink product actually work brew decent cup coffee watered single ground coffee cup product worth money thinking buying second
30000	I first discovered these several years ago on a trip to San Francisco, at the Rainbow Market. They are not only vegetarian and gluten-free, they have no preservatives. However, while authentic tasting, they are not quite spicy enough for me.<br />After my young son decided to become a vegetarian, I started ordering this variety pack on a regular basis. Even though the price each is the same as it is in my local store, I can never find all six flavors at the same store at the same time, so it is worth it to order it this way. I serve these over basmati rice, and it's more than enough to feed two people.	first discovered several year ago trip san francisco rainbow market vegetarian gluten free preservative however authentic tasting quite spicy enough young son decided become vegetarian started ordering variety pack regular basis even though price local store never find six flavor store time worth order way serve basmati rice enough feed two people
40000	The description on the 16oz Carousel-Sugarfree Gumball Refill Ordered from Candy Crate Inc. reads it contains a qty. of 900 gumballs. This is a lie- when you recieve this product there are only 114 servings in a bag and the serving size is considered 2 gumballs- this is a total of only 228 gumballs. To get the quantity they are claiming you would actually have to purchase about 4 bags. The gumballs themselves are fine, but beware of the fake description. Hard to write a good review when I feel a little ripped off. When purchasing online all we have to go by are the descriptions - if they are not accurate what can we base our purchase on?	description oz carousel sugarfree gumball refill ordered candy crate inc read contains qty gumballs lie recieve product serving bag serving size considered gumballs total gumballs get quantity claiming would actually purchase bag gumballs fine beware fake description hard write good review feel little ripped purchasing online go description accurate base purchase
50000	Although I have never gotten it through Amazon.com, baklava made by Shatila Food Products in Michigan (www.shatila.com) is the best there is! Forget the stuff you find at the local bakery or Harry & David--this stuff is in a different league altogether.	although never gotten amazon com baklava made shatila food product michigan www shatila com best forget stuff find local bakery harry david stuff different league altogether
60000	I have updated this mixed review due to the currently outrageous high shipping does NOT justify this product's value. Although This is the best gluten free sour dough bread I have sampled thus far it is much too expensive to pay upwards to $30 for 3 loaves. I have shipped baked goods to military family priority mail for a flat rate of under ten bucks in the past.<br />Company states they will reduce shipping charges in the future but that doesn't help me or others currently eager for this product.<br /><br />As for the quality/consistency of this bread, It is fluffy compared to ther bakeries and has not molded after 10 days. It toasts well and easily makes grilled sandwiches delicious. The nutritional content is as follows : 1 slice is 140 calories, no saturated or trans fats, total fat 2 grams. Cholesterol 20 mg, sodium 190 mg, total carb ( for one huge slice ) is 29 grams, fiber 1 gram , sugars 1 gram , protein 2 grams. This has definetly a great sour dough flavor and is not gritty like other gluten free baked goods from other bakeries..I only wish the shipping costs were less with this and all other gluten free baked goods.	updated mixed review due currently outrageous high shipping justify product value although best gluten free sour dough bread sampled thus far much expensive pay upwards loaf shipped baked good military family priority mail flat rate ten buck past company state reduce shipping charge future help others currently eager product quality consistency bread fluffy compared ther bakery molded day toast well easily make grilled sandwich delicious nutritional content follows slice calorie saturated trans fat total fat gram cholesterol mg sodium mg total carb one huge slice gram fiber gram sugar gram protein gram definetly great sour dough flavor gritty like gluten free baked good bakery wish shipping cost le gluten free baked good
70000	I don't have a gluten intolerance - just trying to cut back on the intake of wheat/gluten...and sugar, but that's another story;) so my body feels less bloated, unhealthy and lethargic due to wheat. With that said, I do know the differences in taste between wheat pastas, rice, quinoa, corn...etc. Personally, I have come to love the taste of non-wheat pastas over wheat, with the exception of corn flour, which isn't similar enough to wheat to fool my taste buds. Annie's does a fantastic job with this rice flour product, giving it a consistency and flavor akin to Kraft mac and cheese. What makes this better is the cheese, which tastes far yummier than any boxed mac & cheese I have ever had. Also, it's real cheese, with as few bizarre ingredients as possible.<br /><br />For those who like to see ingredients, here is a comparison (and please note I am not a nutritionist - just writing a friendly review:):<br /><br />Kraft Mac & Cheese:<br />Cheese sauce mix ingredients: whey (milk protein), milk protein concentrate, milk, milkfat and cheese culture, salt, sodium tripolyphosphate, sodium phosphate and calcium phosphate, Yellow 5 and Yellow 6, citric acid, lactic acid and enzymes.<br /><br />Annie's Rice Pasta & Cheddar: cheddar cheese (cultured pasturized milk, salt, non-animal enzymes), whey, buttermilk, salt, cream, natural flavor, natural sodium phosphate, annatto extract for natural color.<br /><br />*wiki annatto extract: Annatto coloring is produced from the reddish pericarp or pulp which surrounds the seed of the achiote (Bixa orellana L.). It is used in many natural cheeses (e.g., Cheddar, Red Leicester, Gouda (cheese) and Brie), margarine, butter, rice, smoked fish, and custard powder.<br /><br />Annie's also has less sodium and sugars, which I am grateful for.<br /><br />Also, Annie's does make another rice pasta mac and cheese - it's a deluxe box. This is what I would compare to Velveeta - for you lovers out there. It's the ooey gooey cheese that is thicker. Personally, I detest Velveeta, so the deluxe isn't as awesome as the simple Rice Pasta & Cheddar. But the deluxe IS better than macaroni with Velveeta because the consistency of the cheese isn't ridiculous overbearing and throat-clogging as Velveeta. I swear, I always felt like I would suffocate eating that stuff!<br /><br />I definitely recommend this product to those with allergies, and intolerance, or those like myself who are looking for ways to significantly reduce heat intake. My entire family has now switched from Kraft over to this product (and they didn't do it for health reasons - they simply prefer the taste!)<br /><br />It's a bit more pricey, I'll give you that. But for a hint - do check Target occasionally. They sell Annie's pastas and some amazing organic bunny fruit snacks - all of which go on sale quite often (I just purchased Rice Pasta & Cheddar for $1 a box!) If only the prices were always so kind;)	gluten intolerance trying cut back intake wheat gluten sugar another story body feel le bloated unhealthy lethargic due wheat said know difference taste wheat pasta rice quinoa corn etc personally come love taste non wheat pasta wheat exception corn flour similar enough wheat fool taste bud annie fantastic job rice flour product giving consistency flavor akin kraft mac cheese make better cheese taste far yummier boxed mac cheese ever also real cheese bizarre ingredient possible like see ingredient comparison please note nutritionist writing friendly review kraft mac cheese cheese sauce mix ingredient whey milk protein milk protein concentrate milk milkfat cheese culture salt sodium tripolyphosphate sodium phosphate calcium phosphate yellow yellow citric acid lactic acid enzyme annie rice pasta cheddar cheddar cheese cultured pasturized milk salt non animal enzyme whey buttermilk salt cream natural flavor natural sodium phosphate annatto extract natural color wiki annatto extract annatto coloring produced reddish pericarp pulp surround seed achiote bixa orellana l used many natural cheese e g cheddar red leicester gouda cheese brie margarine butter rice smoked fish custard powder annie also le sodium sugar grateful also annie make another rice pasta mac cheese deluxe box would compare velveeta lover ooey gooey cheese thicker personally detest velveeta deluxe awesome simple rice pasta cheddar deluxe better macaroni velveeta consistency cheese ridiculous overbearing throat clogging velveeta swear always felt like would suffocate eating stuff definitely recommend product allergy intolerance like looking way significantly reduce heat intake entire family switched kraft product health reason simply prefer taste bit pricey give hint check target occasionally sell annie pasta amazing organic bunny fruit snack go sale quite often purchased rice pasta cheddar box price always kind
80000	This drink tastes good. I enjoyed it. I also had my daughter and my grandchildren try it--they drank even more of it and found it pleasant to the taste. Once mixed and cold in the fridge, it didn't last long. A good alternative to pop I think.<br /><br />Recommended.	drink taste good enjoyed also daughter grandchild try drank even found pleasant taste mixed cold fridge last long good alternative pop think recommended
90000	I quite enjoyed these cookies. They are reminicent of a shortbread cookie with a hint of orange and some chewy Crasins thrown in for good measure. Fairly reasonable stats for a cookie (140 calories, 5 grams of total fat and 7 sugars)---until you see that is only for THREE cookies. No way you're going to hold yourself to 3 lousy cookies in one sitting so you'd better plan on doubling that. But it is still a better choice that a lot of offerings in the cookie isle. And that is just where I'd go to purchase these. They didn't hold up well in shipping and I wound up with a lot of crumbs. Which I ate anyway. Because they were too yummy to let that stop me. Enjoy!	quite enjoyed cooky reminicent shortbread cookie hint orange chewy crasins thrown good measure fairly reasonable stats cookie calorie gram total fat sugar see three cooky way going hold lousy cooky one sitting better plan doubling still better choice lot offering cookie isle go purchase hold well shipping wound lot crumb ate anyway yummy let stop enjoy
100000	A very nutritrious and delicious soup from Amy's not offered in the organic sections of grocery stores in my area of the U.S. But about a quarter of the cans in the case were dented, and therefore, not acceptable for long term storage.<br /> I wouldn't buy a dented can from a store, and therefore am dismayed that Amazon would ship damaged goods.<br /> If Amazon is getting a good price on this product because the cans are dented already, the product should be advertized as such. I don't like being sent a case of canned goods with the cans in the middle of the case crushed. What's up with that?<br /> I have been satisfied with the condition of other canned goods bought vie Amazon - but buyer beware.<br /> It is not worth my time to complain and return.<br /> But I won't buy Amy's soups through Amazon again, and in the future, will REALLY question whether buying ANY canned goods through Amazon is worth it - even if the price is right - considering that the goods may or may not arrive damaged.<br /> Hey Amazon, Honesty is the best policy. It's not a "deal" if you send me damaged goods.	nutritrious delicious soup amy offered organic section grocery store area u quarter can case dented therefore acceptable long term storage buy dented store therefore dismayed amazon would ship damaged good amazon getting good price product can dented already product advertized like sent case canned good can middle case crushed satisfied condition canned good bought vie amazon buyer beware worth time complain return buy amy soup amazon future really question whether buying canned good amazon worth even price right considering good may may arrive damaged hey amazon honesty best policy deal send damaged good
110000	There are several things a coffee lover looks for in their brew.. the aroma, the color and the taste are what I look for. When I opened the individual pack, I was hit with a wonderful coffee scent. The pod looks typical, and are made for a pod machine. When I made my first cup, since the aroma was strong, I filled the machine with a good sized mug's worth of water, and made a cup. When it was done, the color was fairly light, so I only added a small amount of milk. Still, the flavor was a bit too bland for me, and I like a mild coffee. For the second cup, I used a smaller mug, and in return got a darker, more flavorful cup, so that is my recommendation with this brand. Some other things I love about this brand: it's organic, sustainably grown and Fair Trade certified. That's a lot of benefits for only about 75 cents a cup. The even have their own foundation to support youth soccer programs. This is a good deal. The only thing that I would change is the individual wrap, which seems like an excess of packaging for a company dedicated to the environment.	several thing coffee lover look brew aroma color taste look opened individual pack hit wonderful coffee scent pod look typical made pod machine made first cup since aroma strong filled machine good sized mug worth water made cup done color fairly light added small amount milk still flavor bit bland like mild coffee second cup used smaller mug return got darker flavorful cup recommendation brand thing love brand organic sustainably grown fair trade certified lot benefit cent cup even foundation support youth soccer program good deal thing would change individual wrap seems like excess packaging company dedicated environment

	Time	ProfileName	text_clean	Score
id
0	2012-02-11 08:00:00	cac Idaho	keep cup counter well made sturdy product litt...	5
2	2008-10-16 08:00:00	Auskan "Auskan"	love pantry cook batch rice add sauce dinner s...	5
3	2012-08-24 08:00:00	chicago	used another brand tonkotsu flavor noodle impo...	5
4	2010-07-13 08:00:00	you suckkk	herr favorite chip brand fan salsa love chip	5
5	2012-03-15 08:00:00	Donna	absolutely good french vanilla cappuccino boug...	5
...	...	...	...	...
787	2010-12-09 08:00:00	Erika	new favorite snack food whenever craving sweet...	5
789	2007-03-09 08:00:00	J. Lamar	prepared kit basic add shrimp anything red pep...	5
792	2011-11-21 08:00:00	JVR Mom	month old daughter love formula mixing issue t...	5
793	2011-01-25 08:00:00	Stacy "sllemke"	best ever candy person sweet general however f...	5
795	2009-03-03 08:00:00	O. Vinogradova "jaded mouse"	love treat training small tasty least dog seem...	5

	Time	ProfileName	text_clean	Score
id
7	2011-03-12 08:00:00	CANDICE	pop nice never get taste like movie theater po...	2
10	2011-08-25 08:00:00	Light by the Moon	ordered birthday got birthday money family ord...	2
15	2009-05-25 08:00:00	MamavanMNE	candy good taste seem made natural ingredient ...	2
16	2011-05-11 08:00:00	Dr. M. A. Dixon "hyper-observant"	tea taste like blend ingredient listed taste l...	3
25	2012-06-15 08:00:00	annie "grannieannie"	put enough creamer coffee tolerable good coffe...	2
...	...	...	...	...
2221	2009-09-29 08:00:00	Robert Y. Lamaute "blamaute"	light bright florescent bulb wattage look nice...	3
2246	2012-04-03 08:00:00	Lindsay Pasch "VaBookworm87"	come conclusion big fan thing definitely say m...	3
2250	2011-02-08 08:00:00	Robert C. Reade "Random buyer"	ordered coffee another brand seems three week ...	1
2259	2006-11-10 08:00:00	Kate	bought amazon becuase disappeared real store c...	3
2260	2010-07-18 08:00:00	Steven Meuse	lucky stock canister last summer three left wr...	1

	text_clean	tokenized_text
id
0	keep cup counter well made sturdy product little stiff pull rather falling self used included screw make sure stay pleased	[keep, cup, counter, well, made, sturdy, product, little, stiff, pull, rather, falling, self, used, included, screw, make, sure, stay, pleased]
2572	dog easy finding treat one fit bill two aussie shepherd get full piece min pin chihuahua mix get cut half love always look forward special treat	[dog, easy, finding, treat, one, fit, bill, two, aussie, shepherd, get, full, piece, min, pin, chihuahua, mix, get, cut, half, love, always, look, forward, special, treat]
5116	reading review confused think anyone talking tea sounded like comment pertained prince peace green tea anything instant dong quai red date tea clicked said remarkable tea delicious instant wait steep really good love taste enjoy uniquely bitter flavor dong quai bitter yummy know dong quai considered ginseng woman beceause high vit b help keep woman becoming anemic due monthly cycle also said help regulate irregular period used daily basis said tea special help draw one energy downwards red date add effect red color st chakra word tea aphrodisiac quality create pleasnt feeling taken bed good taste good snap make quibbling	[reading, review, confused, think, anyone, talking, tea, sounded, like, comment, pertained, prince, peace, green, tea, anything, instant, dong, quai, red, date, tea, clicked, said, remarkable, tea, delicious, instant, wait, steep, really, good, love, taste, enjoy, uniquely, bitter, flavor, dong, quai, bitter, yummy, know, dong, quai, considered, ginseng, woman, beceause, high, vit, help, keep, woman, becoming, anemic, due, monthly, cycle, also, said, help, regulate, irregular, period, used, daily, basis, said, tea, special, help, draw, one, energy, downwards, red, date, add, effect, red, color, st, chakra, word, tea, aphrodisiac, quality, create, pleasnt, feeling, taken, bed, good, taste, good, snap, make, quibbling]
7676	wow find avid latte drinker refuse pay outlandish price local coffee shop purchased machine couple year ago find supplier using flavor add coffee going business thankfully amazon com came rescue get convienence flavor delivered home paying le per bottle delivered thank amazon com loyal customer illinois	[wow, find, avid, latte, drinker, refuse, pay, outlandish, price, local, coffee, shop, purchased, machine, couple, year, ago, find, supplier, using, flavor, add, coffee, going, business, thankfully, amazon, com, came, rescue, get, convienence, flavor, delivered, home, paying, le, per, bottle, delivered, thank, amazon, com, loyal, customer, illinois]
10212	mo old simply love stuff st official finger food think combo plus taste good dissolve easy mouth kind important teeth mom comment wish little green even close green color like waved green bowl making someone ought sell stuff mixed case go thru one container every day	[mo, old, simply, love, stuff, st, official, finger, food, think, combo, plus, taste, good, dissolve, easy, mouth, kind, important, teeth, mom, comment, wish, little, green, even, close, green, color, like, waved, green, bowl, making, someone, ought, sell, stuff, mixed, case, go, thru, one, container, every, day]
12784	extremely better wilton buy never use nasty stuff actually edible unlike product easy work	[extremely, better, wilton, buy, never, use, nasty, stuff, actually, edible, unlike, product, easy, work]
15326	using french market coffee many year moving guam store brought sent went mainland last year thrilled find could order amazon reasonable price automatic shipment coffee never bitter due chicory robust tasty use anything else	[using, french, market, coffee, many, year, moving, guam, store, brought, sent, went, mainland, last, year, thrilled, find, could, order, amazon, reasonable, price, automatic, shipment, coffee, never, bitter, due, chicory, robust, tasty, use, anything, else]
17900	dog href http www amazon com gp product b j jkgo canidae dry dog food lamb meal brown rice formula pound bag year recently added diet stool firm seems like crazy taste texture little thick even mixed water	[dog, href, http, www, amazon, com, gp, product, jkgo, canidae, dry, dog, food, lamb, meal, brown, rice, formula, pound, bag, year, recently, added, diet, stool, firm, seems, like, crazy, taste, texture, little, thick, even, mixed, water]
20427	santa cruz soft baked oatmeal raisin cookie one best ever flavor wonderful spice make think eating holiday pastry put plate cooky always first go call adult cookie child love price good delivered door could ask whole line cooky wonderful try find youself happy eating	[santa, cruz, soft, baked, oatmeal, raisin, cookie, one, best, ever, flavor, wonderful, spice, make, think, eating, holiday, pastry, put, plate, cooky, always, first, go, call, adult, cookie, child, love, price, good, delivered, door, could, ask, whole, line, cooky, wonderful, try, find, youself, happy, eating]
23008	husband love tea drink antioxidant content difficulty finding favorite grocery store simply order amazon	[husband, love, tea, drink, antioxidant, content, difficulty, finding, favorite, grocery, store, simply, order, amazon]
7	pop nice never get taste like movie theater popcorn even come close gave star popping taste	[pop, nice, never, get, taste, like, movie, theater, popcorn, even, come, close, gave, star, popping, taste]
9236	href http www amazon com gp product b la vegetable base first bought product heb randalls sauce bought product get sauce like well used anyway good taste change buy would	[href, http, www, amazon, com, gp, product, la, vegetable, base, first, bought, product, heb, randalls, sauce, bought, product, get, sauce, like, well, used, anyway, good, taste, change, buy, would]
18415	liquid v fish v flavor opened expecting large amount liquid ended spilling part table price expecting fish put another way fish swimming liquid flavor good give star could much better buy find appel brunswick	[liquid, fish, flavor, opened, expecting, large, amount, liquid, ended, spilling, part, table, price, expecting, fish, put, another, way, fish, swimming, liquid, flavor, good, give, star, could, much, better, buy, find, appel, brunswick]
27414	tried one big bowl hot spicy almost identical taste mainly use main base soup okay first two time eat however burning taste get old quick known base used one would bought big bowl soup instead smaller bowl sized noodle	[tried, one, big, bowl, hot, spicy, almost, identical, taste, mainly, use, main, base, soup, okay, first, two, time, eat, however, burning, taste, get, old, quick, known, base, used, one, would, bought, big, bowl, soup, instead, smaller, bowl, sized, noodle]
36538	love mcdougall food product one quite measure aftertaste enjoy find single container try first recommend	[love, mcdougall, food, product, one, quite, measure, aftertaste, enjoy, find, single, container, try, first, recommend]
45800	get tea asian grocery around dollar tea good rip	[get, tea, asian, grocery, around, dollar, tea, good, rip]
55169	one ate product one clean vomit product eaten offered new chew dog first cared le record love chew drug choice thought maybe reluctant try something new left chew little later ate seemed enjoy afterward threw twice sure problem chew buying dog certainly recommending anyone else thought giving remaining chew spca dog decided make poochies sick sadly throw rest away	[one, ate, product, one, clean, vomit, product, eaten, offered, new, chew, dog, first, cared, le, record, love, chew, drug, choice, thought, maybe, reluctant, try, something, new, left, chew, little, later, ate, seemed, enjoy, afterward, threw, twice, sure, problem, chew, buying, dog, certainly, recommending, anyone, else, thought, giving, remaining, chew, spca, dog, decided, make, poochies, sick, sadly, throw, rest, away]
64486	incredibly embarrassed basket thought sending something substance based seller description picture cost sister law suffered incredibly life threatening illness received basket town family thinking something use offer guest local relative snack tea visited cheese basket nothing expensive cracker school lunch size packet chocolate chip cooky embarrassed many company offer le expensive basket greater good order basket barb dv	[incredibly, embarrassed, basket, thought, sending, something, substance, based, seller, description, picture, cost, sister, law, suffered, incredibly, life, threatening, illness, received, basket, town, family, thinking, something, use, offer, guest, local, relative, snack, tea, visited, cheese, basket, nothing, expensive, cracker, school, lunch, size, packet, chocolate, chip, cooky, embarrassed, many, company, offer, le, expensive, basket, greater, good, order, basket, barb, dv]
73778	accurate description product ordered received one box ten bar bar great since one box worth buying	[accurate, description, product, ordered, received, one, box, ten, bar, bar, great, since, one, box, worth, buying]
82541	initial review one star pro amazon delivery ordered yesterday prime membership ontrac delivery placed doorstep today saturday thank amazon con either coffee bad make work tried approach senseo machine almost weight two senseo pod unfortunately longer sold amazon used two pod holder fit one pod holder coffee appeared brew e water ran machine fine result even close passable way tried result senseo user understand term right brew button two bar level far weak right brew button one bar level weak left brew button one bar level strong quite bitter left brew button two bar level weak bitter moral story order pod unless specifically designed machine since amazon accept return food product least let loss stand lesson others avoid senseo machine fit suspect coffee good message amazon sold coffee machine sell senseo brand pod anymore something specifically fit g pod update revision one day later okay since stuck thing figured give easily went back senseo pod managed make reasonably good star coffee lesson learned important note senseo user vi vi assume g pod make sure pod oriented correct side use right brew button one bar level yield tasty cup coffee certainly two cup yield know cost benefit v using gram pod reality bottom line revised star moral story give	[initial, review, one, star, pro, amazon, delivery, ordered, yesterday, prime, membership, ontrac, delivery, placed, doorstep, today, saturday, thank, amazon, con, either, coffee, bad, make, work, tried, approach, senseo, machine, almost, weight, two, senseo, pod, unfortunately, longer, sold, amazon, used, two, pod, holder, fit, one, pod, holder, coffee, appeared, brew, water, ran, machine, fine, result, even, close, passable, way, tried, result, senseo, user, understand, term, right, brew, button, two, bar, level, far, weak, right, brew, button, one, bar, level, weak, left, brew, button, one, bar, level, strong, quite, bitter, left, brew, button, two, bar, level, weak, bitter, moral, story, order, pod, ...]

	Time	ProfileName	Summary	Text	Score	Satisfied	text_clean	tokenized_text	stemmed_tokens
id
0	2012-02-11 08:00:00	cac Idaho	Great for small kitchens.	Keeps the cups off the counter!! It is a very ...	5	satisfied	keep cup counter well made sturdy product litt...	[keep, cup, counter, well, made, sturdy, produ...	[keep, cup, counter, well, made, sturdi, produ...
1	2010-07-24 08:00:00	Fielden A. Coleman "Coleblooded1"	Good Taste!! Good Price too!!	The bar is pretty good. Taste more like cinnam...	4	satisfied	bar pretty good taste like cinnamon apple pie ...	[bar, pretty, good, taste, like, cinnamon, app...	[bar, pretti, good, tast, like, cinnamon, appl...
2	2008-10-16 08:00:00	Auskan "Auskan"	Easy & delicious	I love having this in my pantry. I cook a bat...	5	satisfied	love pantry cook batch rice add sauce dinner s...	[love, pantry, cook, batch, rice, add, sauce, ...	[love, pantri, cook, batch, rice, add, sauc, d...
3	2012-08-24 08:00:00	chicago	This is the best!	We used to have another brand tonkotsu flavor ...	5	satisfied	used another brand tonkotsu flavor noodle impo...	[used, another, brand, tonkotsu, flavor, noodl...	[us, anoth, brand, tonkotsu, flavor, noodl, im...
4	2010-07-13 08:00:00	you suckkk	Yum	Herr's are my favorite chip brand. I am not su...	5	satisfied	herr favorite chip brand fan salsa love chip	[herr, favorite, chip, brand, fan, salsa, love...	[herr, favorit, chip, brand, fan, salsa, love,...
...	...	...	...	...	...	...	...	...	...
91440	2012-09-05 08:00:00	JFMile	Son wasn't a fan	My son simply did not like this flavor jar. H...	3	not satisfied	son simply like flavor jar like pea spinach ev...	[son, simply, like, flavor, jar, like, pea, sp...	[son, simpli, like, flavor, jar, like, pea, sp...
91449	2011-05-03 08:00:00	J. S. Bowen	THEY HAVE CHANGED THIS TEA	I USED TO LOVE THIS TEA WHEN IT WAS CALLED "WH...	1	not satisfied	used love tea called white tea made peony plai...	[used, love, tea, called, white, tea, made, pe...	[us, love, tea, call, white, tea, made, peoni,...
91451	2010-05-21 08:00:00	Jennifer Hines "Jen H"	Great Cocoa, Priced Too High	While I love the taste of Green Mountain Hot C...	2	not satisfied	love taste green mountain hot cocoa price k cu...	[love, taste, green, mountain, hot, cocoa, pri...	[love, tast, green, mountain, hot, cocoa, pric...
91454	2011-10-04 08:00:00	B. McMahon	Yuck	I didn't like this brand of coconut water it h...	2	not satisfied	like brand coconut water strange taste brand l...	[like, brand, coconut, water, strange, taste, ...	[like, brand, coconut, water, strang, tast, br...
91455	2012-10-10 08:00:00	Burnadette Cerda	Not as good as I thought it would be.	The seller was amazing and fast so I would ord...	1	not satisfied	seller amazing fast would order sad say tea ex...	[seller, amazing, fast, would, order, sad, say...	[seller, amaz, fast, would, order, sad, sai, t...

	id	stemmed_tokens
0	50319	[pleas, try, avoid, disappoint, wai, pack, qua...
1	58934	[deliveri, quick, easi, howev, product, best, ...
2	30562	[order, brother, law, like, coffe, ship, slow,...
3	6542	[on, best, chocol, bar, tast, recommend, frien...
4	55060	[want, like, coffe, like, bui, came, huge, bag...
...	...	...
27995	71548	[found, dry, stale, also, rel, high, calori, p...
27996	88314	[greet, blew, first, request, review, howev, r...
27997	3467	[fantast, realli, le, calori, fat, eat, spoon,...
27998	10307	[number, cat, medic, problem, tri, number, wai...
27999	9732	[us, head, shoulder, moder, dandruff, work, we...

	id	stemmed_tokens
0	80957	[though, show, differ, flavor, bag, realli, at...
1	90660	[follow, review, larger, size, product, offer,...
2	79126	[watch, advertis, tofu, noodl, decid, try, sup...
3	17701	[kid, seriou, allergi, tri, bake, muffin, us, ...
4	8261	[on, pack, pack, price, care, mistak, two, pac...
...	...	...
11995	2788	[great, choic, like, cinnamon, roll, flavor, c...
11996	13761	[get, cake, mix, auto, deliveri, long, rememb,...
11997	20579	[barri, farm, establish, oct, bill, linda, bar...
11998	19695	[love, herbal, tea, delici, tast, like, oolong...
11999	17727	[dog, alwai, consum, love, pedigre, ag, need, ...

	id	Satisfied	Predict	review
0	80957	not satisfied	not satisfied	[though, show, differ, flavor, bag, realli, at...
500	80996	not satisfied	not satisfied	[whenth, packag, arriv, two, can, open, empti,...
1000	11473	satisfied	not satisfied	[crunchi, cooki, creami, center, cooki, part, ...
1500	43875	not satisfied	not satisfied	[complet, underwhelm, overpr, would, consid, t...
2000	20155	satisfied	satisfied	[year, old, finicki, cat, absolut, love, food,...
2500	15199	satisfied	not satisfied	[oh, boi, dubbl, bubbl, bubbl, gum, giant, ind...
3000	14302	satisfied	not satisfied	[good, appl, soft, hard, flavor, good, packag,...
3500	7918	satisfied	satisfied	[cook, grain, soft, creami, tast, somewhat, ri...
4000	91399	not satisfied	not satisfied	[four, rescu, cat, give, iam, said, organ, tho...
4500	36933	not satisfied	not satisfied	[mind, pai, bag, dog, love, expens, look, bag,...
5000	2063	satisfied	not satisfied	[struggl, dry, skin, excit, try, product, dove...
5500	24499	satisfied	satisfied	[love, almond, flour, graini, us, bake, fine, ...
6000	20320	not satisfied	satisfied	[want, try, someth, healthi, differ, meusli, e...
6500	19246	satisfied	satisfied	[italian, greyhound, love, treat, yet, notic, ...
7000	14868	satisfied	satisfied	[wife, absolut, ador, cocoa, easi, make, hot, ...
7500	18741	satisfied	satisfied	[alwai, love, walker, shortbread, shape, size,...
8000	49797	not satisfied	not satisfied	[month, old, practic, live, cheerio, plum, org...
8500	67583	not satisfied	not satisfied	[enjoi, dip, dress, compani, dip, strong, bitt...
9000	37978	not satisfied	not satisfied	[us, yogi, tea, lemon, ginger, tea, chang, wan...
9500	22370	satisfied	satisfied	[done, recur, shipment, product, steal, carb, ...
10000	241	satisfied	satisfied	[tulli, hous, blend, becom, coffe, alwai, hand...
10500	54662	not satisfied	not satisfied	[like, type, coffe, surpris, blend, suit, tast...
11000	73658	not satisfied	not satisfied	[purchas, option, cat, sinc, eat, chicken, muc...
11500	352	satisfied	satisfied	[love, coffe, wonder, flavor, take, litll, mak...