基于深度学习的音乐推荐系统（三）使用已训练的卷积神经网络提取语谱图特征并计算图像间相似度

释放双眼，带上耳机，听听看~！

该模块包含几部分：

调用训练好的并且已经保存的CNN模型（仅四层卷积层部分）
逐个读取tfrecords文件中的元素，并送入已训练好的CNN中，给每个图片提取128个特征
每首歌包含11个图片，即11*128个特征，将每首歌的11*128个特征之间进行余弦相似度计算
逐个歌曲计算，返回每个歌曲的最相似的三首歌歌名，以列表的形式

调用训练好的并且已经保存的CNN模型（仅四层卷积层部分）
定义CNN模型的参数


1
2
3
4
5
6
1lr = tf.Variable(0.001, dtype=tf.float32)

2x = tf.placeholder(tf.float32, [None, 256, 256, 1],name=&#x27;x&#x27;)

3y_ = tf.placeholder(tf.float32, [None],name=&#x27;y_&#x27;)

4keep_prob = tf.placeholder(tf.float32) 

5

6

CNN模型结构定义


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
1def weight_variable(shape,name):

2   initial = tf.truncated_normal(shape, stddev=0.1)

3   return tf.Variable(initial,name=name)

4

5

6def bias_variable(shape,name):

7   initial = tf.constant(0.1, shape=shape)

8   return tf.Variable(initial,name=name)

9

10with tf.name_scope(&#x27;conv2d&#x27;):

11  def conv2d(x, W):

12      # stride [1, x_movement, y_movement, 1]

13      # Must have strides[0] = strides[3] = 1

14      return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding=&#x27;SAME&#x27;)

15

16with tf.name_scope(&#x27;max_pool_2x2&#x27;):

17  def max_pool_2x2(x):

18      # stride [1, x_movement, y_movement, 1]

19      return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding=&#x27;SAME&#x27;)

20  def max_pool_4x4(x):

21      # stride [1, x_movement, y_movement, 1]

22      return tf.nn.max_pool(x, ksize=[1,4,4,1], strides=[1,4,4,1], padding=&#x27;SAME&#x27;)   

23

24

25def define_predict_y(x):

26  with tf.variable_scope(&quot;conv1&quot;):

27      ## conv1 layer ##

28      W_conv1 = weight_variable([3,3, 1,64],&#x27;W_conv1&#x27;) # patch 3x3, in size 1, out size 64

29      b_conv1 = bias_variable([64],&#x27;b_conv1&#x27;)

30      h_conv1 = tf.nn.elu(conv2d(x, W_conv1) + b_conv1) # output size 28x28x32

31      h_pool1 = tf.nn.max_pool(h_conv1, ksize=[1,2,2,1], strides=[1,2,2,1], padding=&#x27;SAME&#x27;)                                         # output size 14x14x32

32  with tf.variable_scope(&quot;conv2&quot;):

33      ## conv2 layer ##

34      W_conv2 = weight_variable([3,3, 64, 128],&#x27;W_conv2&#x27;) # patch 5x5, in size 32, out size 64

35      b_conv2 = bias_variable([128],&#x27;b_conv2&#x27;)

36      h_conv2 = tf.nn.elu(conv2d(h_pool1, W_conv2) + b_conv2) # output size 14x14x64

37      h_pool2 = max_pool_4x4(h_conv2)  

38  with tf.variable_scope(&quot;conv3&quot;):

39      ## conv3 layer ##

40      W_conv3 = weight_variable([3,3, 128, 256],&#x27;W_conv3&#x27;) # patch 5x5, in size 32, out size 64

41      b_conv3 = bias_variable([256],&#x27;b_conv3&#x27;)

42      h_conv3 = tf.nn.elu(conv2d(h_pool2, W_conv3) + b_conv3) # output size 14x14x64

43      h_pool3 = max_pool_4x4(h_conv3) 

44  with tf.variable_scope(&quot;conv4&quot;):

45      ## conv4 layer ##

46      W_conv4 = weight_variable([3,3, 256, 512],&#x27;W_conv4&#x27;) # patch 5x5, in size 32, out size 64

47      b_conv4 = bias_variable([512],&#x27;b_conv4&#x27;)

48      h_conv4 = tf.nn.elu(conv2d(h_pool3, W_conv4) + b_conv4) # output size 14x14x64

49      h_pool4 = max_pool_4x4(h_conv4)   

50

51  with tf.variable_scope(&quot;fc1&quot;):

52      ## fc1 layer ##

53      W_fc1 = weight_variable([2*2*512, 128],&#x27;W_fc1&#x27;)

54      b_fc1 = bias_variable([128],&#x27;b_fc1&#x27;)

55      # [n_samples, 7, 7, 64] -&gt;&gt; [n_samples, 7*7*64]

56      h_pool4_flat = tf.reshape(h_pool4, [-1, 2*2*512])

57      h_fc1 = tf.nn.elu(tf.matmul(h_pool4_flat, W_fc1) + b_fc1)

58      h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

59

60  # ## fc2 layer ##

61  # with tf.variable_scope(&quot;fc2&quot;):

62  #   W_fc2 = weight_variable([128, 10],&#x27;W_fc2&#x27;)

63  #   b_fc2 = bias_variable([10],&#x27;b_fc2&#x27;)

64  #   predict_y = tf.matmul(h_fc1_drop, W_fc2) + b_fc2

65

66  return h_fc1_drop

67

68prediction = define_predict_y(x)

69# 用于保存和载入模型

70new_saver=tf.train.Saver()

71

72

载入已经保存的模型参数


1
2
3
1new_saver.restore(sess, tf.train.latest_checkpoint(&#x27;C:/Users/Administrator/Desktop/ckpt/&#x27;))

2       print(&quot;导入参数成功！&quot;)

3

逐个读取tfrecords文件中的元素，并送入已训练好的CNN中，给每个图片提取128个特征

1.逐个读取tfrecords文件中的元素


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
1def _parse_record(example_proto):

2   features = {

3               &#x27;encoded&#x27;: tf.FixedLenFeature((), tf.string),

4               &#x27;fname&#x27;: tf.FixedLenFeature((), tf.string),

5               &#x27;width&#x27;: tf.FixedLenFeature((), tf.int64),

6               &#x27;height&#x27;: tf.FixedLenFeature((), tf.int64),

7               &#x27;label&#x27;: tf.FixedLenFeature((), tf.int64),}

8   parsed_features = tf.parse_single_example(example_proto, features=features)

9   return parsed_features

10

11

12

13###1.....

14img_vec_list = [] #所有图片的向量，按顺序存的

15

16

17

18def read_test(input_file):

19

20  # 用 dataset 读取 tfrecord 文件

21  dataset = tf.data.TFRecordDataset(input_file)

22  dataset = dataset.map(_parse_record)#解析tfrecord文件中的所有记录，使用dataset的map方法

23  #dataset = dataset.repeat(epochs).shuffle(buffer_size).batch(batch_size)

24  iterator = dataset.make_one_shot_iterator()

25

26  with tf.Session() as sess:

27      try:

28          i =0

29          while iterator.get_next():

30              i = i+1

31              print(i)

32              features = sess.run(iterator.get_next())

33              img_fname = features[&#x27;fname&#x27;]

34              img_fname = img_fname.decode()

35              img = tf.decode_raw(features[&#x27;encoded&#x27;], tf.uint8)

36              img = tf.reshape(img, [256, 256, 1])

37              img = tf.cast(img, tf.float32) / 255.0        #将矩阵归一化0-1之间

38              label = tf.cast(features[&#x27;label&#x27;], tf.int32)

39              

40              one = [sess.run(img),img_fname,sess.run(label)]

41              print(one[1])

42              img_vec_list.append(one)

43      except tf.errors.OutOfRangeError:

44          print(&quot;..&quot;)

45      print(&quot;-------------&quot;,len(img_vec_list))

46      img_vec_list.sort(key = lambda x:x[1])

47      print(&quot;over..&quot;)

48read_test(&#x27;F:/data/test0.tfrecords&#x27;) 

49read_test(&#x27;F:/data/train0.tfrecords&#x27;) 

50read_test(&#x27;F:/data/test1.tfrecords&#x27;) 

51read_test(&#x27;F:/data/train1.tfrecords&#x27;) 

52read_test(&#x27;F:/data/test2.tfrecords&#x27;) 

53read_test(&#x27;F:/data/train2.tfrecords&#x27;) 

54read_test(&#x27;F:/data/test3.tfrecords&#x27;) 

55read_test(&#x27;F:/data/train3.tfrecords&#x27;) 

56read_test(&#x27;F:/data/test4.tfrecords&#x27;) 

57read_test(&#x27;F:/data/train4.tfrecords&#x27;) 

58read_test(&#x27;F:/data/test5.tfrecords&#x27;) 

59read_test(&#x27;F:/data/train5.tfrecords&#x27;) 

60read_test(&#x27;F:/data/test6.tfrecords&#x27;) 

61read_test(&#x27;F:/data/train6.tfrecords&#x27;) 

62read_test(&#x27;F:/data/test7.tfrecords&#x27;) 

63read_test(&#x27;F:/data/train7.tfrecords&#x27;) 

64read_test(&#x27;F:/data/test8.tfrecords&#x27;) 

65read_test(&#x27;F:/data/train8.tfrecords&#x27;) 

66read_test(&#x27;F:/data/test9.tfrecords&#x27;) 

67read_test(&#x27;F:/data/train9.tfrecords&#x27;) 

68

2.并送入已训练好的CNN中


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
1vector_list = []

2

3def get_vector():

4   with tf.Session() as sess:

5       print(&quot;there..&quot;)

6       # 如果是训练，初始化参数

7       sess.run(tf.global_variables_initializer())

8       print(&quot;222&quot;)

9       # 创建一个协调器，管理线程

10      coord = tf.train.Coordinator()

11      print(&quot;333&quot;)

12      # 启动QueueRunner,此时文件名队列已经进队

13      threads = tf.train.start_queue_runners(sess=sess, coord=coord)

14      print(&quot;444&quot;)

15

16      new_saver.restore(sess, tf.train.latest_checkpoint(&#x27;C:/Users/Administrator/Desktop/ckpt/&#x27;))

17      print(&quot;导入参数成功！&quot;)

18      

19      for i in range(len(img_vec_list)):

20          vector = sess.run(prediction,feed_dict={x:np.expand_dims(img_vec_list[i][0],0),y_:np.expand_dims(img_vec_list[i][2],0),keep_prob:0.5})

21          vector_list.append(vector)

22          #print(&quot;vector is :&quot;,len(vector[0]))

23

24get_vector()

25

每首歌包含11个图片，即11*128个特征，将每首歌的11*128个特征之间进行余弦相似度计算


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
1def cos_sim(vector_a, vector_b):

2    &quot;&quot;&quot;

3    计算两个向量之间的余弦相似度

4    :param vector_a: 向量 a 

5    :param vector_b: 向量 b

6    :return: sim

7    &quot;&quot;&quot;

8    vector_a = np.mat(vector_a)

9    vector_b = np.mat(vector_b)

10    num = float(vector_a * vector_b.T)

11    denom = np.linalg.norm(vector_a) * np.linalg.norm(vector_b)

12    cos = num / denom

13    sim = 0.5 + 0.5 * cos

14    return sim

15

16##########3....

17cos_list = []

18

19def get_all_vec_cos():

20  for i in range(len(img_vec_list)):

21      max_cos = 0

22      max_index = i

23      for j in range(len(img_vec_list)):

24          if int(i/11) == int(j/11):

25              continue

26          else:

27              temp_cos = cos_sim(vector_list[i],vector_list[j])

28              

29              if temp_cos&gt;max_cos:

30                  print(&quot;temp_cos:&quot;,temp_cos,&quot;max_cos&quot;,max_cos)

31                  max_cos = temp_cos

32                  max_index = int(j/11)

33      cos_list.append([int(i/11),max_index,max_cos])

34      print(&quot;cos:&quot;,i,&quot;  &quot;,cos_list[i])

35  print(&quot;cos_list:&quot;,len(cos_list))

36

37get_all_vec_cos()

38

39

逐个歌曲计算，返回每个歌曲的最相似的三首歌歌名，以列表的形式


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
1most_video = []

2

3#返回的是vidoe序号

4def get_most_video():

5   #将cos_list分割,每份11个

6   #cos_list = [cos_list[i:i+11] for i in range(0,len(cos_list),11)]

7   print(&quot;cos_list:&quot;,cos_list)

8   split_cos_list = []

9   for j in range(0,len(cos_list),11):

10      split_cos_list.append(cos_list[j:j+11])

11  print(&quot;split_cos_list:&quot;,split_cos_list)

12  for i in range(len(split_cos_list)):

13      index = []

14      for item in split_cos_list[i]:

15          index.append(item[1])

16      most_index = Counter(index).most_common(3)

17      most_video.append(most_index)

18  #print(&quot;most_video:&quot;,len(most_video))

19

20get_most_video()

21#print(most_video)

22

23

{{userData.name}}已认证

基于深度学习的音乐推荐系统（三）使用已训练的卷积神经网络提取语谱图特征并计算图像间相似度

调用训练好的并且已经保存的CNN模型（仅四层卷积层部分）

逐个读取tfrecords文件中的元素，并送入已训练好的CNN中，给每个图片提取128个特征

每首歌包含11个图片，即11128个特征，将每首歌的11128个特征之间进行余弦相似度计算

逐个歌曲计算，返回每个歌曲的最相似的三首歌歌名，以列表的形式

sysbench性能压测以及mysql性能压测

Ubuntu上NFS的安装配置

{{userData.name}}已认证

调用训练好的并且已经保存的CNN模型（仅四层卷积层部分）

逐个读取tfrecords文件中的元素，并送入已训练好的CNN中，给每个图片提取128个特征

每首歌包含11个图片，即11*128个特征，将每首歌的11*128个特征之间进行余弦相似度计算

逐个歌曲计算，返回每个歌曲的最相似的三首歌歌名，以列表的形式

Related posts:

sysbench性能压测以及mysql性能压测

Ubuntu上NFS的安装配置

Redis主从复制

Mysql安装、配置、优化

CentOS7安装MySQL

CentOS7安装Docker

每首歌包含11个图片，即11128个特征，将每首歌的11128个特征之间进行余弦相似度计算