{"id":7032,"date":"2024-07-20T15:01:03","date_gmt":"2024-07-20T07:01:03","guid":{"rendered":""},"modified":"2024-07-20T15:01:03","modified_gmt":"2024-07-20T07:01:03","slug":"\u6587\u672c\u8f6c\u5316\u4e3a\u8bcd\u5411\u91cf_\u5411\u91cf\u600e\u4e48\u7b97","status":"publish","type":"post","link":"https:\/\/mushiming.com\/7032.html","title":{"rendered":"\u6587\u672c\u8f6c\u5316\u4e3a\u8bcd\u5411\u91cf_\u5411\u91cf\u600e\u4e48\u7b97"},"content":{"rendered":"

\n <\/path> \n<\/svg> <\/p>\n

\u8bcd\u5411\u91cf\u8f6c\u6362\u56de\u6587\u672c-\u4ee5CMU-MOSI\u4e3a\u4f8b<\/h3>\n

\u9700\u6c42\uff1a\u5728\u8fdb\u884c\u6df1\u5ea6\u5b66\u4e60\u7684\u65f6\u5019\uff0c\u6211\u4eec\u4f7f\u7528glove\u5c06\u6587\u672c\u8f6c\u6362\u4e3a\u8bcd\u5411\u91cf\uff0c\u4f7f\u5f97\u795e\u7ecf\u7f51\u7edc\u53ef\u4ee5\u66f4\u597d\u7684\u7406\u89e3\u6587\u672c\u3002\u4f46\u662f\uff0c\u5f53\u6211\u4eec\u5f97\u5230\u4e00\u4e2a\u5df2\u7ecf\u7ecf\u8fc7\u9884\u5904\u7406\u7684\u6570\u636e\u96c6\uff0c\u6211\u4eec\u60f3\u8981\u9006\u63a8\u5bfc\u5f97\u5230\u8be5\u5411\u91cf\u7684\u6587\u672c<\/mark>\u3002
\u8fd9\u4e2a\u65f6\u5019\u6211\u4eec\u5e94\u8be5\u600e\u4e48\u529e\uff1f\u6211\u4eec\u8fd9\u91cc\u4ee5\u591a\u6a21\u6001\u60c5\u611f\u5206\u6790\u6570\u636e\u96c6CMU-MOSI\u4e3a\u4f8b\u5b50\u8fdb\u884c\u4ecb\u7ecd\u3002<\/p>\n

\u6b65\u9aa4<\/h4>\n
    \n
  1. \u4e0b\u8f7d\u6240\u4f7f\u7528\u7684embedding\u7248\u672c\u3002\u6211\u8fd9\u91cc\u662fglove.840B.300d\u3002\u7531\u4e8e\u53ef\u80fd\u9700\u8981\u7ffb\u5899\uff0c\u63d0\u4f9b\u4e00\u4e0b\u767e\u5ea6\u4e91\u76d8\uff0c\u9700\u8981\u53ef\u4ee5\u81ea\u53d6<\/li>\n<\/ol>\n
    \u94fe\u63a5\uff1ahttps:\/\/pan.baidu.com\/s\/1SFK93RpL5YJfEmV2jgWDIw \u63d0\u53d6\u7801\uff1aop2g \u590d\u5236\u8fd9\u6bb5\u5185\u5bb9\u540e\u6253\u5f00\u767e\u5ea6\u7f51\u76d8\u624b\u673aApp\uff0c\u64cd\u4f5c\u66f4\u65b9\u4fbf\u54e6 <\/code><\/pre>\n
      \n
    1. \u4f7f\u7528\u4e0b\u8f7d\u7684glove\uff0c\u6784\u5efa\u4e24\u4e2a\u5728\u8f6c\u6362\u4e2d\u9700\u8981\u7684\u6570\u636e\u7ed3\u6784\uff0c\u5206\u522b\u662fid2word\u7684\u5b57\u5178\u4ee5\u53caembedding\u7684\u5217\u8868<\/li>\n<\/ol>\n
      def<\/span> contruct_twoFile<\/span>(<\/span>)<\/span>:<\/span> id2word =<\/span> { \n   <\/span>}<\/span> embedding =<\/span> [<\/span>]<\/span> with<\/span> open<\/span>(<\/span>r'F:\\pcode\\dataset\\glove.840B.300d.txt'<\/span>,<\/span> 'r'<\/span>,<\/span> encoding=<\/span>'utf-8'<\/span>)<\/span>as<\/span> f:<\/span> row =<\/span> 0<\/span> for<\/span> line in<\/span> f.<\/span>readlines(<\/span>)<\/span>:<\/span> if<\/span> row ==<\/span> 0<\/span>:<\/span> row =<\/span> 1<\/span> continue<\/span> row +=<\/span> 1<\/span> if<\/span> row %<\/span> 10000<\/span> ==<\/span> 0<\/span>:<\/span> print<\/span>(<\/span>row)<\/span> word =<\/span> line.<\/span>strip(<\/span>)<\/span>.<\/span>split(<\/span>' '<\/span>)<\/span>[<\/span>0<\/span>]<\/span> try<\/span>:<\/span> vec =<\/span> np.<\/span>array(<\/span>[<\/span>float<\/span>(<\/span>item)<\/span> for<\/span> item in<\/span> line.<\/span>strip(<\/span>)<\/span>.<\/span>split(<\/span>)<\/span>[<\/span>1<\/span>:<\/span>]<\/span>]<\/span>)<\/span> except<\/span>:<\/span> continue<\/span> # print()<\/span> if<\/span> (<\/span>vec.<\/span>shape[<\/span>0<\/span>]<\/span> ==<\/span> 300<\/span>)<\/span>:<\/span> id2word[<\/span>len<\/span>(<\/span>embedding)<\/span>]<\/span> =<\/span> word embedding.<\/span>append(<\/span>vec)<\/span> # exit(0)<\/span> glove_embedding =<\/span> np.<\/span>array(<\/span>embedding)<\/span> <\/code><\/pre>\n
        \n
      1. \u8f7d\u5165MOSI\u6570\u636e\u96c6\u4e2d\u6587\u672c\u7684\u90e8\u5206<\/li>\n<\/ol>\n
        dataset =<\/span> pickle.<\/span>load(<\/span>open<\/span>(<\/span>path,<\/span> 'rb'<\/span>)<\/span>)<\/span> train_text_embed =<\/span> torch.<\/span>tensor(<\/span>dataset[<\/span>'train'<\/span>]<\/span>[<\/span>'text'<\/span>]<\/span>.<\/span>astype(<\/span>np.<\/span>float32)<\/span>)<\/span>.<\/span>cpu(<\/span>)<\/span>.<\/span>detach(<\/span>)<\/span> <\/code><\/pre>\n
          \n
        1. \u7136\u540e\u5c31\u53ef\u4ee5\u6839\u636e\u5982\u4e0b\u4ee3\u7801\u8fdb\u884c\u8f6c\u6362\u4e86<\/li>\n<\/ol>\n
          total_text_context =<\/span> [<\/span>]<\/span> for<\/span> x in<\/span> train_text_embed:<\/span> text =<\/span> [<\/span>]<\/span> # 50, 300<\/span> for<\/span> xx in<\/span> x:<\/span> # \u5728\u8fd950\u4e2a\u91cc\u9762\u627e<\/span> dataset_embed =<\/span> xx[<\/span>:<\/span>300<\/span>]<\/span> if<\/span> np.<\/span>sum<\/span>(<\/span>dataset_embed)<\/span> !=<\/span> 0<\/span>:<\/span> dataset_embed =<\/span> np.<\/span>reshape(<\/span>dataset_embed,<\/span> [<\/span>1<\/span>,<\/span> 300<\/span>]<\/span>)<\/span> scores =<\/span> np.<\/span>sum<\/span>(<\/span>np.<\/span>abs<\/span>(<\/span>dataset_embed -<\/span> glove_embedding)<\/span>,<\/span> axis=<\/span>-<\/span>1<\/span>)<\/span> id<\/span> =<\/span> scores.<\/span>argmin(<\/span>)<\/span> score =<\/span> scores[<\/span>id<\/span>]<\/span> text.<\/span>append(<\/span>id2word[<\/span>id<\/span>]<\/span>)<\/span> if<\/span> score <<\/span> 0.1<\/span>:<\/span> print<\/span>(<\/span>id2word[<\/span>id<\/span>]<\/span> +<\/span> ':'<\/span> +<\/span> str<\/span>(<\/span>score)<\/span>)<\/span> text.<\/span>append(<\/span>id2word[<\/span>id<\/span>]<\/span>)<\/span> total_text_context.<\/span>append(<\/span>text)<\/span> <\/code><\/pre>\n
            \n
          1. \u7ed3\u679c\u5982\u56fe\u6240\u793a
            \"\u6587\u672c\u8f6c\u5316\u4e3a\u8bcd\u5411\u91cf_\u5411\u91cf\u600e\u4e48\u7b97<\/li>\n<\/ol>\n","protected":false},"excerpt":{"rendered":"\u6587\u672c\u8f6c\u5316\u4e3a\u8bcd\u5411\u91cf_\u5411\u91cf\u600e\u4e48\u7b97\u8bcd\u5411\u91cf\u8f6c\u6362\u56de\u6587\u672c-\u4ee5CMU-MOSI\u4e3a\u4f8b\u9700\u6c42\uff1a\u5728\u8fdb\u884c\u6df1\u5ea6\u5b66\u4e60\u7684\u65f6\u5019\uff0c\u6211\u4eec\u4f7f\u7528glove\u5c06\u6587\u672c\u8f6c\u6362\u4e3a\u8bcd\u5411\u91cf...","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[],"tags":[],"_links":{"self":[{"href":"https:\/\/mushiming.com\/wp-json\/wp\/v2\/posts\/7032"}],"collection":[{"href":"https:\/\/mushiming.com\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/mushiming.com\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/mushiming.com\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/mushiming.com\/wp-json\/wp\/v2\/comments?post=7032"}],"version-history":[{"count":0,"href":"https:\/\/mushiming.com\/wp-json\/wp\/v2\/posts\/7032\/revisions"}],"wp:attachment":[{"href":"https:\/\/mushiming.com\/wp-json\/wp\/v2\/media?parent=7032"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/mushiming.com\/wp-json\/wp\/v2\/categories?post=7032"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/mushiming.com\/wp-json\/wp\/v2\/tags?post=7032"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}