{"id":195,"date":"2024-04-14T15:36:02","date_gmt":"2024-04-14T06:36:02","guid":{"rendered":"https:\/\/chocottopro.com\/?p=195"},"modified":"2024-04-26T11:09:18","modified_gmt":"2024-04-26T02:09:18","slug":"%e3%80%90%e5%88%9d%e5%bf%83%e8%80%85%e5%90%91%e3%81%91%e3%80%91%e3%82%b5%e3%83%b3%e3%83%97%e3%83%ab%e3%82%b3%e3%83%bc%e3%83%89%e3%81%a7%e5%be%b9%e5%ba%95%e8%a7%a3%e8%aa%ac%ef%bc%81%e6%9c%80%e6%96%b0","status":"publish","type":"post","link":"https:\/\/chocottopro.com\/?p=195","title":{"rendered":"\u3010\u521d\u5fc3\u8005\u5411\u3051\u3011\u30b5\u30f3\u30d7\u30eb\u30b3\u30fc\u30c9\u3067\u5fb9\u5e95\u89e3\u8aac\uff01\u6700\u65b0\u7248spacy\u306b\u3088\u308b\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u5165\u9580"},"content":{"rendered":"\n<p>\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u30e9\u30a4\u30d6\u30e9\u30easpacy\u306f\u3001Python\u3067\u9ad8\u901f\u304b\u3064\u4f7f\u3044\u3084\u3059\u3044\u8a00\u8a9e\u51e6\u7406\u3092\u5b9f\u73fe\u3057\u307e\u3059\u3002\u672c\u8a18\u4e8b\u3067\u306f\u3001spacy\u306e\u6982\u8981\u304b\u3089\u5b9f\u8df5\u7684\u306a\u6d3b\u7528\u6cd5\u307e\u3067\u3001\u30b5\u30f3\u30d7\u30eb\u30b3\u30fc\u30c9\u3092\u4ea4\u3048\u306a\u304c\u3089\u8a73\u3057\u304f\u89e3\u8aac\u3057\u307e\u3059\u3002\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306e\u53ef\u80fd\u6027\u3092\u5e83\u3052\u308bspacy\u306e\u9b45\u529b\u3092\u3001\u305c\u3072\u4f53\u611f\u3057\u3066\u304f\u3060\u3055\u3044\u3002<\/p>\n\n\n\n<div class=\"wp-block-sgb-block-simple sgb-box-simple sgb-box-simple--title-normal sgb-box-simple--with-border\"><div style=\"background-color:var(--wp--preset--color--sango-main);color:#FFF\" class=\"sgb-box-simple__title\">\u3053\u306e\u8a18\u4e8b\u3092\u8aad\u3093\u3060\u3089\u308f\u304b\u308b\u3053\u3068<\/div><div class=\"sgb-box-simple__body\" style=\"border-color:var(--wp--preset--color--sango-main);background-color:#FFF\">\n<ul class=\"wp-block-list\">\n<li>spacy\u306e\u7279\u5fb4\u3068\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u30e9\u30a4\u30d6\u30e9\u30ea\u3068\u3057\u3066\u306e\u4f4d\u7f6e\u3065\u3051<\/li>\n\n\n\n<li>spacy\u306e\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u65b9\u6cd5\u3068\u57fa\u672c\u7684\u306a\u4f7f\u3044\u65b9<\/li>\n\n\n\n<li>\u65e5\u672c\u8a9e\u30c6\u30ad\u30b9\u30c8\u306e\u524d\u51e6\u7406\u3068\u8a00\u8a9e\u89e3\u6790\u306e\u6d41\u308c<\/li>\n\n\n\n<li>\u30c6\u30ad\u30b9\u30c8\u5206\u985e\u3001\u611f\u60c5\u5206\u6790\u3001\u30ad\u30fc\u30ef\u30fc\u30c9\u62bd\u51fa\u306a\u3069\u5b9f\u8df5\u7684\u306a\u30e6\u30fc\u30b9\u30b1\u30fc\u30b9<\/li>\n\n\n\n<li>\u5927\u898f\u6a21\u8a00\u8a9e\u30e2\u30c7\u30eb\uff08BERT\u3001GPT\u306a\u3069\uff09\u3068spacy\u3092\u7d44\u307f\u5408\u308f\u305b\u305f\u9ad8\u5ea6\u306a\u6d3b\u7528\u6cd5<\/li>\n<\/ul>\n<\/div><\/div>\n\n\n\n<div class=\"toc\"><br \/>\n<b>Warning<\/b>:  Undefined array key \"is_admin\" in <b>\/home\/c7479301\/public_html\/chocottopro.com\/wp-content\/themes\/sango-theme\/library\/gutenberg\/dist\/classes\/Toc.php<\/b> on line <b>116<\/b><br \/>\n<br \/>\n<b>Warning<\/b>:  Undefined array key \"is_category_top\" in <b>\/home\/c7479301\/public_html\/chocottopro.com\/wp-content\/themes\/sango-theme\/library\/gutenberg\/dist\/classes\/Toc.php<\/b> on line <b>121<\/b><br \/>\n<br \/>\n<b>Warning<\/b>:  Undefined array key \"is_top\" in <b>\/home\/c7479301\/public_html\/chocottopro.com\/wp-content\/themes\/sango-theme\/library\/gutenberg\/dist\/classes\/Toc.php<\/b> on line <b>128<\/b><br \/>\n    <div id=\"toc_container\" class=\"sgb-toc--bullets js-smooth-scroll\" data-dialog-title=\"Table of Contents\">\n      <p class=\"toc_title\">\u76ee\u6b21 <\/p>\n      <ul class=\"toc_list\">  <li class=\"first\">    <a href=\"#i-0\">spacy\u3068\u306f\u4f55\u304b\uff1f\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u30e9\u30a4\u30d6\u30e9\u30ea\u306e\u6982\u8981\u3068\u7279\u5fb4<\/a>    <ul class=\"menu_level_1\">      <li class=\"first\">        <a href=\"#i-1\">spacy\u306e\u4f4d\u7f6e\u3065\u3051 \u2013 \u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u30e9\u30a4\u30d6\u30e9\u30ea\u306e\u4e2d\u3067\u306e\u7acb\u3061\u4f4d\u7f6e<\/a>      <\/li>      <li class=\"last\">        <a href=\"#i-2\">spacy\u306e\u7279\u9577 \u2013 \u9ad8\u901f\u6027\u3001\u8a2d\u8a08\u306e\u67d4\u8edf\u6027\u3001\u5145\u5b9f\u3057\u305f\u30e2\u30c7\u30eb\u7b49<\/a>      <\/li>    <\/ul>  <\/li>  <li>    <a href=\"#i-3\">spacy\u306e\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3068\u57fa\u672c\u7684\u306a\u4f7f\u3044\u65b9<\/a>    <ul class=\"menu_level_1\">      <li class=\"first\">        <a href=\"#i-4\">\u958b\u767a\u74b0\u5883\u306e\u6e96\u5099 \u2013 Python\u3068Pip\u306e\u30bb\u30c3\u30c8\u30a2\u30c3\u30d7<\/a>      <\/li>      <li>        <a href=\"#i-5\">spacy\u306e\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb \u2013 Pip\u30b3\u30de\u30f3\u30c9\u306b\u3088\u308b\u5c0e\u5165\u65b9\u6cd5<\/a>      <\/li>      <li>        <a href=\"#i-6\">Language Model\u306e\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9 \u2013 \u5229\u7528\u53ef\u80fd\u306a\u8a00\u8a9e\u30e2\u30c7\u30eb\u306e\u7a2e\u985e\u3068\u5165\u624b\u6cd5<\/a>      <\/li>      <li class=\"last\">        <a href=\"#i-7\">spacy\u306e\u57fa\u672c\u7684\u306a\u30ef\u30fc\u30af\u30d5\u30ed\u30fc \u2013 \u30d1\u30a4\u30d7\u30e9\u30a4\u30f3\u306e\u69cb\u6210\u8981\u7d20\u3068\u51e6\u7406\u306e\u6d41\u308c<\/a>      <\/li>    <\/ul>  <\/li>  <li>    <a href=\"#i-8\">\u65e5\u672c\u8a9e\u30c6\u30ad\u30b9\u30c8\u306e\u524d\u51e6\u7406\u3068\u8a00\u8a9e\u89e3\u6790\u306e\u57fa\u672c<\/a>    <ul class=\"menu_level_1\">      <li class=\"first\">        <a href=\"#i-9\">Tokenizer\u306b\u3088\u308b\u5358\u8a9e\u5206\u5272 \u2013 \u65e5\u672c\u8a9e\u7279\u6709\u306e\u554f\u984c\u3068\u5bfe\u51e6\u6cd5<\/a>      <\/li>      <li>        <a href=\"#i-10\">\u54c1\u8a5e\u30bf\u30b0\u4ed8\u3051\u3068\u30ec\u30f3\u30de\u5316 \u2013 \u5358\u8a9e\u306e\u6a19\u6e96\u5f62\u30fb\u898b\u51fa\u3057\u8a9e\u3078\u306e\u5909\u63db<\/a>      <\/li>      <li>        <a href=\"#i-11\">\u69cb\u6587\u89e3\u6790 \u2013 \u5358\u8a9e\u306e\u4fc2\u308a\u53d7\u3051\u95a2\u4fc2\u306e\u89e3\u6790<\/a>      <\/li>      <li class=\"last\">        <a href=\"#i-12\">\u56fa\u6709\u8868\u73fe\u62bd\u51fa \u2013 \u4eba\u540d\u3001\u5730\u540d\u3001\u7d44\u7e54\u540d\u7b49\u306e\u62bd\u51fa\u65b9\u6cd5<\/a>      <\/li>    <\/ul>  <\/li>  <li>    <a href=\"#i-13\">spacy\u306b\u3088\u308b\u5b9f\u8df5\u7684\u306a\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306e\u30e6\u30fc\u30b9\u30b1\u30fc\u30b9<\/a>    <ul class=\"menu_level_1\">      <li class=\"first\">        <a href=\"#i-14\">\u30c6\u30ad\u30b9\u30c8\u5206\u985e \u2013 \u30cb\u30e5\u30fc\u30b9\u8a18\u4e8b\u3084\u30ec\u30d3\u30e5\u30fc\u306e\u81ea\u52d5\u30ab\u30c6\u30b4\u30ea\u5206\u985e<\/a>      <\/li>      <li>        <a href=\"#i-15\">\u611f\u60c5\u5206\u6790 \u2013 SNS\u306e\u6295\u7a3f\u306a\u3069\u304b\u3089\u30e6\u30fc\u30b6\u30fc\u306e\u611f\u60c5\u3092\u5224\u5b9a\u3059\u308b<\/a>      <\/li>      <li>        <a href=\"#i-16\">\u30ad\u30fc\u30ef\u30fc\u30c9\u62bd\u51fa \u2013 \u6587\u66f8\u306e\u4e3b\u984c\u3092\u8868\u3059\u91cd\u8981\u8a9e\u53e5\u306e\u540c\u5b9a<\/a>      <\/li>      <li class=\"last\">        <a href=\"#i-17\">\u985e\u4f3c\u5ea6\u5224\u5b9a \u2013 \u6587\u7ae0\u540c\u58eb\u306e\u610f\u5473\u7684\u306a\u8fd1\u3055\u3092\u6e2c\u308b<\/a>      <\/li>    <\/ul>  <\/li>  <li>    <a href=\"#i-18\">\u767a\u5c55\u7684\u306a\u8a71\u984c \u2013 \u5927\u898f\u6a21\u8a00\u8a9e\u30e2\u30c7\u30eb\u3068spacy\u306e\u9023\u643a<\/a>    <ul class=\"menu_level_1\">      <li class=\"first\">        <a href=\"#i-19\">BERT\u3001GPT\u7b49\u306e\u4e8b\u524d\u5b66\u7fd2\u6e08\u307f\u30e2\u30c7\u30eb\u306e\u6d3b\u7528\u6cd5<\/a>      <\/li>      <li class=\"last\">        <a href=\"#i-20\">Transformers\u30e9\u30a4\u30d6\u30e9\u30ea\u3068\u306e\u9023\u643a\u306b\u3088\u308b\u30e2\u30c7\u30eb\u306e\u9ad8\u5ea6\u5316<\/a>      <\/li>    <\/ul>  <\/li>  <li class=\"last\">    <a href=\"#i-21\">\u307e\u3068\u3081 \u2013 \u6700\u65b0\u306espacy\u3067\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306e\u53ef\u80fd\u6027\u3092\u5e83\u3052\u3088\u3046<\/a>    <ul class=\"menu_level_1\">      <li class=\"first\">        <a href=\"#i-22\">spacy\u304c\u3082\u305f\u3089\u3059\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306e\u6c11\u4e3b\u5316<\/a>      <\/li>      <li>        <a href=\"#i-23\">\u5b9f\u52d9\u3084\u7814\u7a76\u3078\u306espacy\u306e\u6d3b\u7528\u30a2\u30a4\u30c7\u30a2<\/a>      <\/li>      <li class=\"last\">        <a href=\"#i-24\">spacy\u306e\u4eca\u5f8c\u306e\u767a\u5c55\u3078\u306e\u671f\u5f85<\/a>      <\/li>    <\/ul>  <\/li><\/ul>\n      \n    <\/div><\/div><h2 class=\"wp-block-heading\" id=\"i-0\">spacy\u3068\u306f\u4f55\u304b\uff1f\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u30e9\u30a4\u30d6\u30e9\u30ea\u306e\u6982\u8981\u3068\u7279\u5fb4<\/h2>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-1\">spacy\u306e\u4f4d\u7f6e\u3065\u3051 \u2013 \u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u30e9\u30a4\u30d6\u30e9\u30ea\u306e\u4e2d\u3067\u306e\u7acb\u3061\u4f4d\u7f6e<\/h3>\n\n\n\n<p>spacy\u306f\u3001Python\u3067\u5b9f\u88c5\u3055\u308c\u305f\u6700\u5148\u7aef\u306e\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u30e9\u30a4\u30d6\u30e9\u30ea\u3067\u3059\u3002\u4ed6\u306e\u30e9\u30a4\u30d6\u30e9\u30ea\u3068\u6bd4\u3079\u308b\u3068\u3001\u51e6\u7406\u901f\u5ea6\u3068\u4f7f\u3044\u3084\u3059\u3055\u306b\u91cd\u70b9\u3092\u7f6e\u3044\u3066\u3044\u308b\u306e\u304c\u7279\u5fb4\u3067\u3059\u3002<\/p>\n\n\n\n<p>\u9ad8\u901f\u6027\u3092\u5b9f\u73fe\u3059\u308b\u305f\u3081\u306b\u3001spacy\u306fCython\uff08C\u3068Python\u306e\u30cf\u30a4\u30d6\u30ea\u30c3\u30c9\u8a00\u8a9e\uff09\u3092\u30d9\u30fc\u30b9\u306b\u958b\u767a\u3055\u308c\u3066\u3044\u307e\u3059\u3002\u5927\u898f\u6a21\u306a\u30c6\u30ad\u30b9\u30c8\u30c7\u30fc\u30bf\u3092\u6271\u3046\u969b\u306b\u3001\u305d\u306e\u771f\u4fa1\u3092\u767a\u63ee\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u307e\u305f\u3001spacy\u306f\u6a5f\u68b0\u5b66\u7fd2\u3092\u7528\u3044\u305f\u9ad8\u5ea6\u306a\u8a00\u8a9e\u89e3\u6790\u3092\u3001\u6bd4\u8f03\u7684\u5c11\u306a\u3044\u30b3\u30fc\u30c9\u91cf\u3067\u5b9f\u73fe\u3067\u304d\u307e\u3059\u3002\u305d\u306e\u305f\u3081\u3001\u7814\u7a76\u8005\u3060\u3051\u3067\u306a\u304f\u3001\u5b9f\u52d9\u3067\u8a00\u8a9e\u30c7\u30fc\u30bf\u3092\u6271\u3046\u30a8\u30f3\u30b8\u30cb\u30a2\u3084\u30c7\u30fc\u30bf\u30b5\u30a4\u30a8\u30f3\u30c6\u30a3\u30b9\u30c8\u306b\u3082\u9069\u3057\u3066\u3044\u308b\u3068\u3044\u3048\u308b\u3067\u3057\u3087\u3046\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-2\">spacy\u306e\u7279\u9577 \u2013 \u9ad8\u901f\u6027\u3001\u8a2d\u8a08\u306e\u67d4\u8edf\u6027\u3001\u5145\u5b9f\u3057\u305f\u30e2\u30c7\u30eb\u7b49<\/h3>\n\n\n\n<p>spacy\u306e\u5927\u304d\u306a\u7279\u9577\u306f\u3001\u30e2\u30b8\u30e5\u30fc\u30eb\u5316\u3055\u308c\u305f\u8a2d\u8a08\u306b\u3042\u308a\u307e\u3059\u3002\u5fc5\u8981\u306a\u6a5f\u80fd\u3060\u3051\u3092\u9078\u629e\u3057\u3066\u4f7f\u7528\u3067\u304d\u308b\u305f\u3081\u3001\u7121\u99c4\u306a\u30aa\u30fc\u30d0\u30fc\u30d8\u30c3\u30c9\u3092\u524a\u6e1b\u3067\u304d\u307e\u3059\u3002\u307e\u305f\u3001\u30ab\u30b9\u30bf\u30e0\u30b3\u30f3\u30dd\u30fc\u30cd\u30f3\u30c8\u3092\u8ffd\u52a0\u3059\u308b\u3053\u3068\u3082\u5bb9\u6613\u3067\u3001\u958b\u767a\u8005\u306e\u81ea\u7531\u5ea6\u304c\u9ad8\u3044\u306e\u3082\u9b45\u529b\u306e\u4e00\u3064\u3067\u3059\u3002<\/p>\n\n\n\n<p>spacy\u306b\u306f\u3001\u4e8b\u524d\u5b66\u7fd2\u6e08\u307f\u306e\u8a00\u8a9e\u30e2\u30c7\u30eb\u304c\u8c4a\u5bcc\u306b\u7528\u610f\u3055\u308c\u3066\u3044\u307e\u3059\u3002\u82f1\u8a9e\u3092\u306f\u3058\u3081\u3001\u65e5\u672c\u8a9e\u3001\u4e2d\u56fd\u8a9e\u3001\u30c9\u30a4\u30c4\u8a9e\u306a\u3069\u3001\u591a\u8a00\u8a9e\u306b\u5bfe\u5fdc\u3002\u30b0\u30ed\u30fc\u30d0\u30eb\u306a\u30d7\u30ed\u30b8\u30a7\u30af\u30c8\u306b\u3082\u6d3b\u7528\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<p>API\u306e\u8a2d\u8a08\u3082\u3001\u30b7\u30f3\u30d7\u30eb\u3067\u76f4\u611f\u7684\u306a\u4f7f\u3044\u3084\u3059\u3055\u3092\u91cd\u8996\u3057\u3066\u3044\u307e\u3059\u3002\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306e\u8907\u96d1\u306a\u51e6\u7406\u3092\u3001\u5c11\u306a\u3044\u30b3\u30fc\u30c9\u91cf\u3067\u5b9f\u73fe\u53ef\u80fd\u3067\u3059\u3002<\/p>\n\n\n\n<p>\u3055\u3089\u306b\u3001spacy\u306f\u30aa\u30fc\u30d7\u30f3\u30bd\u30fc\u30b9\u30d7\u30ed\u30b8\u30a7\u30af\u30c8\u3068\u3057\u3066GitHub\u4e0a\u3067\u958b\u767a\u3055\u308c\u3066\u304a\u308a\u3001\u4e16\u754c\u4e2d\u306e\u958b\u767a\u8005\u304c\u30b3\u30f3\u30c8\u30ea\u30d3\u30e5\u30fc\u30c8\u3057\u3066\u3044\u307e\u3059\u3002\u6d3b\u767a\u306a\u30b3\u30df\u30e5\u30cb\u30c6\u30a3\u306b\u3088\u3063\u3066\u3001\u5e38\u306b\u6700\u65b0\u306e\u6280\u8853\u3092\u53d6\u308a\u8fbc\u307f\u306a\u304c\u3089\u9032\u5316\u3092\u7d9a\u3051\u3066\u3044\u308b\u70b9\u3082\u3001\u5927\u304d\u306a\u5f37\u307f\u3068\u3044\u3048\u308b\u3067\u3057\u3087\u3046\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"i-3\">spacy\u306e\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3068\u57fa\u672c\u7684\u306a\u4f7f\u3044\u65b9<\/h2>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-4\">\u958b\u767a\u74b0\u5883\u306e\u6e96\u5099 \u2013 Python\u3068Pip\u306e\u30bb\u30c3\u30c8\u30a2\u30c3\u30d7<\/h3>\n\n\n\n<p>spacy\u3092\u4f7f\u3046\u306b\u306f\u3001\u307e\u305aPython\u74b0\u5883\u3092\u6574\u3048\u308b\u5fc5\u8981\u304c\u3042\u308a\u307e\u3059\u3002spacy\u306f\u3001Python 3.6\u4ee5\u4e0a\u3067\u30b5\u30dd\u30fc\u30c8\u3055\u308c\u3066\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<p>Python\u306e\u30c7\u30a3\u30b9\u30c8\u30ea\u30d3\u30e5\u30fc\u30b7\u30e7\u30f3\u3068\u3057\u3066\u3001Anaconda\u3084MiniConda\u304c\u304a\u3059\u3059\u3081\u3067\u3059\u3002\u3053\u308c\u3089\u3092\u4f7f\u3046\u3053\u3068\u3067\u3001\u5fc5\u8981\u306a\u30e9\u30a4\u30d6\u30e9\u30ea\u306e\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3084\u4eee\u60f3\u74b0\u5883\u306e\u69cb\u7bc9\u304c\u7c21\u5358\u306b\u306a\u308a\u307e\u3059\u3002<\/p>\n\n\n\n<p>Python\u306e\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u304c\u5b8c\u4e86\u3057\u305f\u3089\u3001spacy\u3092\u52d5\u304b\u3059\u305f\u3081\u306e\u30c4\u30fc\u30eb\u3092\u63c3\u3048\u307e\u3057\u3087\u3046\u3002JupyterNotebook\u3084JupyterLab\u304c\u3042\u308c\u3070\u3001\u5bfe\u8a71\u7684\u306bspacy\u306e\u30b3\u30fc\u30c9\u3092\u5b9f\u884c\u3057\u306a\u304c\u3089\u52d5\u4f5c\u3092\u78ba\u8a8d\u3067\u304d\u308b\u306e\u3067\u4fbf\u5229\u3067\u3059\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-5\">spacy\u306e\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb \u2013 Pip\u30b3\u30de\u30f3\u30c9\u306b\u3088\u308b\u5c0e\u5165\u65b9\u6cd5<\/h3>\n\n\n\n<p>\u958b\u767a\u74b0\u5883\u304c\u6574\u3063\u305f\u3089\u3001\u3044\u3088\u3044\u3088spacy\u306e\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3067\u3059\u3002spacy\u306f\u3001Python\u306e\u30d1\u30c3\u30b1\u30fc\u30b8\u7ba1\u7406\u30b7\u30b9\u30c6\u30e0\u3067\u3042\u308bpip\u3092\u4f7f\u3063\u3066\u5c0e\u5165\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u30bf\u30fc\u30df\u30ca\u30eb\u3084\u30b3\u30de\u30f3\u30c9\u30d7\u30ed\u30f3\u30d7\u30c8\u304b\u3089\u3001\u4ee5\u4e0b\u306e\u30b3\u30de\u30f3\u30c9\u3092\u5b9f\u884c\u3057\u3066\u304f\u3060\u3055\u3044\u3002<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">pip install spacy<\/pre>\n\n\n\n<p>\u3053\u308c\u3060\u3051\u3067\u3001spacy\u3068\u305d\u306e\u4f9d\u5b58\u30e9\u30a4\u30d6\u30e9\u30ea\u304c\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3055\u308c\u307e\u3059\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-6\">Language Model\u306e\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9 \u2013 \u5229\u7528\u53ef\u80fd\u306a\u8a00\u8a9e\u30e2\u30c7\u30eb\u306e\u7a2e\u985e\u3068\u5165\u624b\u6cd5<\/h3>\n\n\n\n<p>spacy\u3067\u306f\u3001\u5404\u8a00\u8a9e\u306e\u7279\u5fb4\u3092\u5b66\u7fd2\u6e08\u307f\u306eLanguage Model\u3092\u4f7f\u7528\u3057\u307e\u3059\u3002\u3053\u308c\u3089\u306e\u30e2\u30c7\u30eb\u306f\u3001\u3042\u3089\u304b\u3058\u3081spacy\u304c\u63d0\u4f9b\u3057\u3066\u3044\u308b\u300c\u4e8b\u524d\u5b66\u7fd2\u6e08\u307f\u30e2\u30c7\u30eb\u300d\u3092\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u3059\u308b\u3053\u3068\u3067\u5229\u7528\u53ef\u80fd\u306b\u306a\u308a\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u4f8b\u3048\u3070\u82f1\u8a9e\u306e\u5834\u5408\u3001\u4ee5\u4e0b\u306e\u30b3\u30de\u30f3\u30c9\u3067\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">python -m spacy download en_core_web_sm<\/pre>\n\n\n\n<p>\u65e5\u672c\u8a9e\u306a\u3089\u3001\u4ee5\u4e0b\u306e\u30b3\u30de\u30f3\u30c9\u306b\u306a\u308a\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">python -m spacy download ja_core_news_sm<\/pre>\n\n\n\n<p>\u3053\u306e\u3088\u3046\u306b\u3001\u4f7f\u7528\u3057\u305f\u3044\u8a00\u8a9e\u306e\u30e2\u30c7\u30eb\u3092\u6307\u5b9a\u3057\u3066\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u3057\u3066\u304f\u3060\u3055\u3044\u3002spacy\u306f100\u4ee5\u4e0a\u306e\u8a00\u8a9e\u306b\u5bfe\u5fdc\u3057\u3066\u3044\u308b\u306e\u3067\u3001\u591a\u8a00\u8a9e\u74b0\u5883\u3067\u6d3b\u8e8d\u3067\u304d\u308b\u3067\u3057\u3087\u3046\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-7\">spacy\u306e\u57fa\u672c\u7684\u306a\u30ef\u30fc\u30af\u30d5\u30ed\u30fc \u2013 \u30d1\u30a4\u30d7\u30e9\u30a4\u30f3\u306e\u69cb\u6210\u8981\u7d20\u3068\u51e6\u7406\u306e\u6d41\u308c<\/h3>\n\n\n\n<p>spacy\u306b\u3088\u308b\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306f\u3001\u4ee5\u4e0b\u306e\u3088\u3046\u306a\u30ef\u30fc\u30af\u30d5\u30ed\u30fc\u3067\u9032\u307f\u307e\u3059\u3002<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li>Language Model\u3092\u8aad\u307f\u8fbc\u307f\u3001nlp\u30aa\u30d6\u30b8\u30a7\u30af\u30c8\u3092\u4f5c\u6210\u3059\u308b\u3002<\/li>\n\n\n\n<li>\u89e3\u6790\u5bfe\u8c61\u306e\u30c6\u30ad\u30b9\u30c8\u3092nlp\u30aa\u30d6\u30b8\u30a7\u30af\u30c8\u306b\u6e21\u3057\u3001Doc\u30aa\u30d6\u30b8\u30a7\u30af\u30c8\u3092\u5f97\u308b\u3002<\/li>\n\n\n\n<li>Doc\u30aa\u30d6\u30b8\u30a7\u30af\u30c8\u304b\u3089Token\u30aa\u30d6\u30b8\u30a7\u30af\u30c8\u3092\u53d6\u308a\u51fa\u3057\u3001\u5358\u8a9e\u3084\u54c1\u8a5e\u306e\u60c5\u5831\u306b\u30a2\u30af\u30bb\u30b9\u3059\u308b\u3002<\/li>\n\n\n\n<li>\u5fc5\u8981\u306b\u5fdc\u3058\u3066\u3001\u56fa\u6709\u8868\u73fe\u306e\u62bd\u51fa\u3084\u69cb\u6587\u89e3\u6790\u306a\u3069\u306e\u51e6\u7406\u3092\u884c\u3046\u3002<\/li>\n<\/ol>\n\n\n\n<p>\u3053\u308c\u3089\u306e\u4e00\u9023\u306e\u51e6\u7406\u306f\u3001spacy\u306eLanguage Pipeline\u306b\u6cbf\u3063\u3066\u5b9f\u884c\u3055\u308c\u307e\u3059\u3002\u30d1\u30a4\u30d7\u30e9\u30a4\u30f3\u306f\u3001Tokenizer\u3001Tagger\u3001Parser\u3001EntityRecognizer\u306a\u3069\u306e\u30b3\u30f3\u30dd\u30fc\u30cd\u30f3\u30c8\u3067\u69cb\u6210\u3055\u308c\u3066\u304a\u308a\u3001\u30c6\u30ad\u30b9\u30c8\u30c7\u30fc\u30bf\u306f\u3053\u308c\u3089\u306e\u30b3\u30f3\u30dd\u30fc\u30cd\u30f3\u30c8\u3092\u9806\u306b\u901a\u904e\u3057\u3066\u3044\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u5404\u30b3\u30f3\u30dd\u30fc\u30cd\u30f3\u30c8\u306e\u51e6\u7406\u7d50\u679c\u306f\u3001Doc\u3001Token\u3001Span\u306a\u3069\u306e\u30aa\u30d6\u30b8\u30a7\u30af\u30c8\u306b\u683c\u7d0d\u3055\u308c\u307e\u3059\u3002\u3053\u308c\u3089\u306e\u30aa\u30d6\u30b8\u30a7\u30af\u30c8\u3092\u901a\u3058\u3066\u3001\u62bd\u51fa\u3055\u308c\u305f\u8a00\u8a9e\u7684\u7279\u5fb4\u306b\u30a2\u30af\u30bb\u30b9\u3067\u304d\u308b\u3088\u3046\u306b\u306a\u308a\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u4ee5\u4e0a\u304c\u3001spacy\u3092\u4f7f\u3063\u305f\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306e\u57fa\u672c\u7684\u306a\u6d41\u308c\u3067\u3059\u3002\u6b21\u7ae0\u304b\u3089\u306f\u3001\u5b9f\u969b\u306b\u30b3\u30fc\u30c9\u3092\u66f8\u304d\u306a\u304c\u3089spacy\u306e\u4f7f\u3044\u65b9\u3092\u898b\u3066\u3044\u304d\u307e\u3057\u3087\u3046\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"i-8\">\u65e5\u672c\u8a9e\u30c6\u30ad\u30b9\u30c8\u306e\u524d\u51e6\u7406\u3068\u8a00\u8a9e\u89e3\u6790\u306e\u57fa\u672c<\/h2>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-9\">Tokenizer\u306b\u3088\u308b\u5358\u8a9e\u5206\u5272 \u2013 \u65e5\u672c\u8a9e\u7279\u6709\u306e\u554f\u984c\u3068\u5bfe\u51e6\u6cd5<\/h3>\n\n\n\n<p>\u65e5\u672c\u8a9e\u306e\u30c6\u30ad\u30b9\u30c8\u3092\u89e3\u6790\u3059\u308b\u969b\u3001\u6700\u521d\u306e\u30cf\u30fc\u30c9\u30eb\u3068\u306a\u308b\u306e\u304cTokenization\uff08\u5358\u8a9e\u5206\u5272\uff09\u3067\u3059\u3002\u82f1\u8a9e\u306a\u3069\u306e\u3088\u3046\u306b\u5358\u8a9e\u9593\u306b\u30b9\u30da\u30fc\u30b9\u304c\u3042\u308b\u8a00\u8a9e\u3068\u7570\u306a\u308a\u3001\u65e5\u672c\u8a9e\u306f\u5358\u8a9e\u304c\u9023\u7d9a\u3057\u3066\u66f8\u304b\u308c\u308b\u305f\u3081\u3001\u5358\u8a9e\u306e\u5883\u754c\u3092\u898b\u3064\u3051\u308b\u306e\u304c\u96e3\u3057\u304f\u306a\u308a\u307e\u3059\u3002<\/p>\n\n\n\n<p>spacy\u306e\u65e5\u672c\u8a9e\u30e2\u30c7\u30eb\u3067\u306f\u3001\u3053\u306e\u554f\u984c\u306b\u5bfe\u51e6\u3059\u308b\u305f\u3081\u306bSudachiPy\u3068MeCab\u3092\u30d0\u30c3\u30af\u30a8\u30f3\u30c9\u3068\u3057\u305fTokenizer\u3092\u63d0\u4f9b\u3057\u3066\u3044\u307e\u3059\u3002\u3053\u308c\u3089\u306eTokenizer\u306f\u3001\u65e5\u672c\u8a9e\u306e\u6587\u6cd5\u7684\u306a\u7279\u5fb4\u3092\u8003\u616e\u3057\u306a\u304c\u3089\u3001\u9069\u5207\u306a\u5358\u4f4d\u3067\u5358\u8a9e\u3092\u5206\u5272\u3057\u3066\u304f\u308c\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u305f\u3060\u3057\u3001\u65e5\u672c\u8a9e\u7279\u6709\u306e\u554f\u984c\u3068\u3057\u3066\u3001\u8907\u5408\u8a9e\u306e\u5206\u5272\u3084\u672a\u77e5\u8a9e\u306e\u51e6\u7406\u306a\u3069\u304c\u6319\u3052\u3089\u308c\u307e\u3059\u3002\u4f8b\u3048\u3070\u3001\u300c\u6771\u4eac\u90fd\u5e81\u300d\u306e\u3088\u3046\u306a\u8907\u5408\u8a9e\u3092\u300c\u6771\u4eac\u300d\u300c\u90fd\u300d\u300c\u5e81\u300d\u306e\u3088\u3046\u306b\u5206\u5272\u3057\u3066\u3057\u307e\u3046\u3068\u3001\u672c\u6765\u306e\u610f\u5473\u304c\u5931\u308f\u308c\u3066\u3057\u307e\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u3053\u308c\u3089\u306e\u554f\u984c\u306b\u5bfe\u51e6\u3059\u308b\u306b\u306f\u3001\u30e6\u30fc\u30b6\u30fc\u5b9a\u7fa9\u8f9e\u66f8\u3092\u7528\u3044\u305f\u308a\u3001\u8907\u5408\u8a9e\u30921\u3064\u306e\u30c8\u30fc\u30af\u30f3\u3068\u3057\u3066\u6271\u3046\u30eb\u30fc\u30eb\u3092\u8a2d\u5b9a\u3057\u305f\u308a\u3059\u308b\u306e\u304c\u6709\u52b9\u3067\u3059\u3002spacy\u3067\u306f\u3001Tokenizer.add_special_case\u30e1\u30bd\u30c3\u30c9\u3092\u4f7f\u3063\u3066\u3001\u3053\u306e\u3088\u3046\u306a\u30ab\u30b9\u30bf\u30de\u30a4\u30ba\u304c\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-10\">\u54c1\u8a5e\u30bf\u30b0\u4ed8\u3051\u3068\u30ec\u30f3\u30de\u5316 \u2013 \u5358\u8a9e\u306e\u6a19\u6e96\u5f62\u30fb\u898b\u51fa\u3057\u8a9e\u3078\u306e\u5909\u63db<\/h3>\n\n\n\n<p>\u5358\u8a9e\u5206\u5272\u304c\u5b8c\u4e86\u3057\u305f\u3089\u3001\u5404\u30c8\u30fc\u30af\u30f3\u306b\u54c1\u8a5e\u60c5\u5831\u3092\u4ed8\u4e0e\u3057\u3066\u3044\u304d\u307e\u3059\u3002\u3053\u308c\u3092POS Tagging\uff08\u54c1\u8a5e\u30bf\u30b0\u4ed8\u3051\uff09\u3068\u547c\u3073\u307e\u3059\u3002<\/p>\n\n\n\n<p>spacy\u306eTagger\u306f\u3001\u65e5\u672c\u8a9e\u3067\u306fUniDic\u54c1\u8a5e\u4f53\u7cfb\u306b\u57fa\u3065\u3044\u3066\u30bf\u30b0\u4ed8\u3051\u3092\u884c\u3044\u307e\u3059\u3002UniDic\u306f\u3001\u8a00\u8a9e\u5b66\u7684\u306b\u7cbe\u7dfb\u306a\u54c1\u8a5e\u60c5\u5831\u3092\u63d0\u4f9b\u3057\u3066\u3044\u308b\u305f\u3081\u3001\u9ad8\u5ea6\u306a\u8a00\u8a9e\u89e3\u6790\u306b\u9069\u3057\u3066\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u54c1\u8a5e\u30bf\u30b0\u4ed8\u3051\u3068\u4e26\u884c\u3057\u3066\u3001\u30ec\u30f3\u30de\u5316\uff08Lemmatization\uff09\u3082\u884c\u308f\u308c\u307e\u3059\u3002\u30ec\u30f3\u30de\u5316\u3068\u306f\u3001\u5404\u30c8\u30fc\u30af\u30f3\u3092\u539f\u5f62\uff08\u8f9e\u66f8\u5f62\uff09\u306b\u5909\u63db\u3059\u308b\u51e6\u7406\u3067\u3059\u3002\u4f8b\u3048\u3070\u3001\u52d5\u8a5e\u306e\u300c\u98df\u3079\u308b\u300d\u300c\u98df\u3079\u305f\u300d\u300c\u98df\u3079\u307e\u3059\u300d\u306a\u3069\u306f\u3001\u5168\u3066\u539f\u5f62\u306e\u300c\u98df\u3079\u308b\u300d\u306b\u5909\u63db\u3055\u308c\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u3053\u308c\u3089\u306e\u51e6\u7406\u306b\u3088\u3063\u3066\u3001\u30c6\u30ad\u30b9\u30c8\u30c7\u30fc\u30bf\u306f\u8a00\u8a9e\u7684\u306b\u6574\u7406\u3055\u308c\u305f\u5f62\u306b\u306a\u308a\u307e\u3059\u3002\u4ee5\u4e0b\u306e\u30b3\u30fc\u30c9\u4f8b\u3092\u898b\u3066\u307f\u307e\u3057\u3087\u3046\u3002<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">import spacy\n\nnlp = spacy.load(\"ja_core_news_sm\")\ndoc = nlp(\"\u79c1\u306f\u30ea\u30f3\u30b4\u3092\u98df\u3079\u307e\u3059\u3002\")\n\nfor token in doc:\n    print(token.text, token.pos_, token.lemma_)<\/pre>\n\n\n\n<p>\u51fa\u529b\u7d50\u679c\u306f\u4ee5\u4e0b\u306e\u3088\u3046\u306b\u306a\u308a\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">\u79c1 PRON \u79c1\n\u306f ADP \u306f\n\u30ea\u30f3\u30b4 NOUN \u30ea\u30f3\u30b4\n\u3092 ADP \u3092\n\u98df\u3079 VERB \u98df\u3079\u308b\n\u307e\u3059 AUX \u307e\u3059\n\u3002 PUNCT \u3002<\/pre>\n\n\n\n<p>\u5404\u30c8\u30fc\u30af\u30f3\u306b\u5bfe\u3057\u3066\u3001\u54c1\u8a5e\u30bf\u30b0\uff08token.pos_\uff09\u3068\u30ec\u30f3\u30de\uff08token.lemma_\uff09\u304c\u4ed8\u4e0e\u3055\u308c\u3066\u3044\u308b\u3053\u3068\u304c\u308f\u304b\u308a\u307e\u3059\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-11\">\u69cb\u6587\u89e3\u6790 \u2013 \u5358\u8a9e\u306e\u4fc2\u308a\u53d7\u3051\u95a2\u4fc2\u306e\u89e3\u6790<\/h3>\n\n\n\n<p>\u54c1\u8a5e\u30bf\u30b0\u4ed8\u3051\u304c\u5b8c\u4e86\u3057\u305f\u3089\u3001\u6b21\u306f\u69cb\u6587\u89e3\u6790\uff08Parsing\uff09\u3092\u884c\u3044\u307e\u3059\u3002\u69cb\u6587\u89e3\u6790\u306f\u3001\u6587\u306e\u69cb\u9020\u3092\u89e3\u304d\u660e\u304b\u3057\u3001\u5358\u8a9e\u9593\u306e\u95a2\u4fc2\u6027\u3092\u898b\u51fa\u3059\u51e6\u7406\u3067\u3059\u3002<\/p>\n\n\n\n<p>\u65e5\u672c\u8a9e\u306e\u69cb\u6587\u89e3\u6790\u3067\u306f\u3001\u4e3b\u306b\u4f9d\u5b58\u69cb\u9020\u89e3\u6790\u304c\u7528\u3044\u3089\u308c\u307e\u3059\u3002\u3053\u308c\u306f\u3001\u5404\u5358\u8a9e\u304c\u3069\u306e\u5358\u8a9e\u306b\u4fc2\u3063\u3066\u3044\u308b\u306e\u304b\u3092\u898b\u3064\u3051\u51fa\u3059\u624b\u6cd5\u3067\u3059\u3002<\/p>\n\n\n\n<p>spacy\u306eParser\u306f\u3001\u30c8\u30fc\u30af\u30f3\u9593\u306e\u4f9d\u5b58\u95a2\u4fc2\u3092\u63a8\u5b9a\u3057\u3001\u305d\u306e\u7d50\u679c\u3092\u6709\u5411\u30b0\u30e9\u30d5\u3068\u3057\u3066\u51fa\u529b\u3057\u307e\u3059\u3002\u4ee5\u4e0b\u306e\u30b3\u30fc\u30c9\u3067\u3001\u69cb\u6587\u89e3\u6790\u306e\u7d50\u679c\u3092\u53ef\u8996\u5316\u3057\u3066\u307f\u307e\u3057\u3087\u3046\u3002<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">import spacy\nfrom spacy import displacy\n\nnlp = spacy.load(\"ja_core_news_sm\")\ndoc = nlp(\"\u79c1\u306f\u30ea\u30f3\u30b4\u3092\u98df\u3079\u307e\u3059\u3002\")\ndisplacy.render(doc, style=\"dep\", jupyter=False, file=\"dependency_plot.svg\")<\/pre>\n\n\n\n<p>\u3053\u306e\u30b3\u30fc\u30c9\u3092\u5b9f\u884c\u3059\u308b\u3068\u3001\u4ee5\u4e0b\u306e\u3088\u3046\u306a\u4f9d\u5b58\u69cb\u9020\u306e\u30b0\u30e9\u30d5\u304c\u8868\u793a\u3055\u308c\u307e\u3059\u3002<\/p>\n\n\n\n<figure class=\"wp-block-image\"><img loading=\"lazy\" decoding=\"async\" width=\"1974\" height=\"678\" src=\"https:\/\/chocottopro.com\/wp-content\/uploads\/2024\/04\/\u30b9\u30af\u30ea\u30fc\u30f3\u30b7\u30e7\u30c3\u30c8-2024-04-09-23.50.52.png\" alt=\"\" class=\"wp-image-196\" srcset=\"https:\/\/chocottopro.com\/wp-content\/uploads\/2024\/04\/\u30b9\u30af\u30ea\u30fc\u30f3\u30b7\u30e7\u30c3\u30c8-2024-04-09-23.50.52.png 1974w, https:\/\/chocottopro.com\/wp-content\/uploads\/2024\/04\/\u30b9\u30af\u30ea\u30fc\u30f3\u30b7\u30e7\u30c3\u30c8-2024-04-09-23.50.52-300x103.png 300w, https:\/\/chocottopro.com\/wp-content\/uploads\/2024\/04\/\u30b9\u30af\u30ea\u30fc\u30f3\u30b7\u30e7\u30c3\u30c8-2024-04-09-23.50.52-1024x352.png 1024w, https:\/\/chocottopro.com\/wp-content\/uploads\/2024\/04\/\u30b9\u30af\u30ea\u30fc\u30f3\u30b7\u30e7\u30c3\u30c8-2024-04-09-23.50.52-768x264.png 768w, https:\/\/chocottopro.com\/wp-content\/uploads\/2024\/04\/\u30b9\u30af\u30ea\u30fc\u30f3\u30b7\u30e7\u30c3\u30c8-2024-04-09-23.50.52-1536x528.png 1536w, https:\/\/chocottopro.com\/wp-content\/uploads\/2024\/04\/\u30b9\u30af\u30ea\u30fc\u30f3\u30b7\u30e7\u30c3\u30c8-2024-04-09-23.50.52-940x323.png 940w\" sizes=\"auto, (max-width: 1974px) 100vw, 1974px\"><\/figure>\n\n\n\n<p>\u30b0\u30e9\u30d5\u3092\u898b\u308b\u3068\u3001\u300c\u98df\u3079\u307e\u3059\u300d\u304c\u6587\u306e\u4e2d\u5fc3\u3067\u3042\u308a\u3001\u300c\u79c1\u300d\u3068\u300c\u30ea\u30f3\u30b4\u300d\u304c\u305d\u308c\u306b\u4fc2\u3063\u3066\u3044\u308b\u3053\u3068\u304c\u308f\u304b\u308a\u307e\u3059\u3002\u3053\u306e\u3088\u3046\u306b\u3001\u69cb\u6587\u89e3\u6790\u306b\u3088\u3063\u3066\u5358\u8a9e\u9593\u306e\u95a2\u4fc2\u6027\u304c\u660e\u3089\u304b\u306b\u306a\u308a\u307e\u3059\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-12\">\u56fa\u6709\u8868\u73fe\u62bd\u51fa \u2013 \u4eba\u540d\u3001\u5730\u540d\u3001\u7d44\u7e54\u540d\u7b49\u306e\u62bd\u51fa\u65b9\u6cd5<\/h3>\n\n\n\n<p>\u6700\u5f8c\u306b\u3001\u56fa\u6709\u8868\u73fe\u62bd\u51fa\uff08Named Entity Recognition\uff09\u306b\u3064\u3044\u3066\u898b\u3066\u3044\u304d\u307e\u3057\u3087\u3046\u3002\u56fa\u6709\u8868\u73fe\u62bd\u51fa\u306f\u3001\u30c6\u30ad\u30b9\u30c8\u4e2d\u304b\u3089\u4eba\u540d\u3001\u5730\u540d\u3001\u7d44\u7e54\u540d\u306a\u3069\u306e\u56fa\u6709\u540d\u8a5e\u3092\u898b\u3064\u3051\u51fa\u3059\u51e6\u7406\u3067\u3059\u3002<\/p>\n\n\n\n<p>spacy\u306eEntityRecognizer\u306f\u3001\u5404\u8a00\u8a9e\u306e\u30e2\u30c7\u30eb\u306b\u542b\u307e\u308c\u308b\u56fa\u6709\u8868\u73fe\u30c7\u30fc\u30bf\u3092\u5229\u7528\u3057\u3066\u3001\u30c6\u30ad\u30b9\u30c8\u304b\u3089\u56fa\u6709\u8868\u73fe\u3092\u62bd\u51fa\u3057\u307e\u3059\u3002\u4ee5\u4e0b\u306e\u30b3\u30fc\u30c9\u306f\u3001\u65e5\u672c\u8a9e\u30c6\u30ad\u30b9\u30c8\u304b\u3089\u56fa\u6709\u8868\u73fe\u3092\u898b\u3064\u3051\u51fa\u3059\u4f8b\u3067\u3059\u3002<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">import spacy\n\nnlp = spacy.load(\"ja_core_news_sm\")\ndoc = nlp(\"\u30a2\u30c3\u30d7\u30eb\u793e\u306e\u30c6\u30a3\u30e0\u30fb\u30af\u30c3\u30afCEO\u306f\u3001iPhone\u306e\u65b0\u30e2\u30c7\u30eb\u3092\u767a\u8868\u3057\u307e\u3057\u305f\u3002\")\n\nfor ent in doc.ents:\n    print(ent.text, ent.label_)<\/pre>\n\n\n\n<p>\u51fa\u529b\u7d50\u679c\u306f\u4ee5\u4e0b\u306e\u3088\u3046\u306b\u306a\u308a\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">\u30a2\u30c3\u30d7\u30eb\u793e ORG\n\u30c6\u30a3\u30e0\u30fb\u30af\u30c3\u30af PERSON\niPhone PRODUCT<\/pre>\n\n\n\n<p>\u300c\u30a2\u30c3\u30d7\u30eb\u793e\u300d\u306f\u7d44\u7e54\u540d\u3001\u300c\u30c6\u30a3\u30e0\u30fb\u30af\u30c3\u30af\u300d\u306f\u4eba\u540d\u3001\u300ciPhone\u300d\u306f\u88fd\u54c1\u540d\u3068\u3057\u3066\u8a8d\u8b58\u3055\u308c\u3066\u3044\u307e\u3059\u3002\u3053\u306e\u3088\u3046\u306b\u3001spacy\u3092\u4f7f\u3048\u3070\u7c21\u5358\u306b\u56fa\u6709\u8868\u73fe\u3092\u62bd\u51fa\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u4ee5\u4e0a\u3001\u65e5\u672c\u8a9e\u30c6\u30ad\u30b9\u30c8\u306e\u524d\u51e6\u7406\u3068\u57fa\u672c\u7684\u306a\u8a00\u8a9e\u89e3\u6790\u306e\u624b\u9806\u3092\u898b\u3066\u304d\u307e\u3057\u305f\u3002\u6b21\u7ae0\u3067\u306f\u3001\u3053\u308c\u3089\u306e\u6280\u8853\u3092\u5fdc\u7528\u3057\u305f\u5b9f\u8df5\u7684\u306a\u30e6\u30fc\u30b9\u30b1\u30fc\u30b9\u3092\u7d39\u4ecb\u3057\u3066\u3044\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"i-13\">spacy\u306b\u3088\u308b\u5b9f\u8df5\u7684\u306a\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306e\u30e6\u30fc\u30b9\u30b1\u30fc\u30b9<\/h2>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-14\">\u30c6\u30ad\u30b9\u30c8\u5206\u985e \u2013 \u30cb\u30e5\u30fc\u30b9\u8a18\u4e8b\u3084\u30ec\u30d3\u30e5\u30fc\u306e\u81ea\u52d5\u30ab\u30c6\u30b4\u30ea\u5206\u985e<\/h3>\n\n\n\n<p>\u30c6\u30ad\u30b9\u30c8\u5206\u985e\u306f\u3001\u4e0e\u3048\u3089\u308c\u305f\u30c6\u30ad\u30b9\u30c8\u3092\u4e88\u3081\u5b9a\u7fa9\u3055\u308c\u305f\u30ab\u30c6\u30b4\u30ea\u306b\u632f\u308a\u5206\u3051\u308b\u6280\u8853\u3067\u3059\u3002\u30cb\u30e5\u30fc\u30b9\u8a18\u4e8b\u306e\u30b8\u30e3\u30f3\u30eb\u5206\u985e\u3084\u3001\u30ec\u30d3\u30e5\u30fc\u306e\u611f\u60c5\u6975\u6027\u5224\u5b9a\u306a\u3069\u3001\u5e45\u5e83\u3044\u5834\u9762\u3067\u6d3b\u7528\u3055\u308c\u3066\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<p>spacy\u306eTextCategorizer\u6a5f\u80fd\u3092\u4f7f\u3046\u3068\u3001\u3053\u306e\u3088\u3046\u306a\u30c6\u30ad\u30b9\u30c8\u5206\u985e\u3092\u7c21\u5358\u306b\u5b9f\u88c5\u3067\u304d\u307e\u3059\u3002\u4ee5\u4e0b\u306f\u3001\u30cb\u30e5\u30fc\u30b9\u8a18\u4e8b\u3092\u30ab\u30c6\u30b4\u30ea\u5206\u985e\u3059\u308b\u4f8b\u3067\u3059\u3002<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">import spacy\nfrom spacy.tokens import DocBin\n\n# \u5b66\u7fd2\u30c7\u30fc\u30bf\u306e\u6e96\u5099\ntrain_data = [\n    (\"\u30aa\u30ea\u30f3\u30d4\u30c3\u30af\u306e\u958b\u4f1a\u5f0f\u304c\u884c\u308f\u308c\u305f\u3002\", {\"cats\": {\"\u30b9\u30dd\u30fc\u30c4\": 1.0, \"\u653f\u6cbb\": 0.0, \"\u7d4c\u6e08\": 0.0}}),\n    (\"\u682a\u4fa1\u304c\u5927\u5e45\u306b\u4e0b\u843d\u3057\u305f\u3002\", {\"cats\": {\"\u30b9\u30dd\u30fc\u30c4\": 0.0, \"\u653f\u6cbb\": 0.0, \"\u7d4c\u6e08\": 1.0}}),\n    (\"\u9996\u76f8\u304c\u8a18\u8005\u4f1a\u898b\u3092\u958b\u3044\u305f\u3002\", {\"cats\": {\"\u30b9\u30dd\u30fc\u30c4\": 0.0, \"\u653f\u6cbb\": 1.0, \"\u7d4c\u6e08\": 0.0}}),\n    # ...\n]\n\n# \u30e2\u30c7\u30eb\u306e\u5b66\u7fd2\nnlp = spacy.blank(\"ja\")\ntextcat = nlp.create_pipe(\"textcat\")\nnlp.add_pipe(textcat)\ntextcat.add_label(\"\u30b9\u30dd\u30fc\u30c4\")\ntextcat.add_label(\"\u653f\u6cbb\")\ntextcat.add_label(\"\u7d4c\u6e08\")\n\ndb = DocBin()\nfor text, annotations in train_data:\n    doc = nlp(text)\n    doc.cats = annotations[\"cats\"]\n    db.add(doc)\ndb.to_disk(\".\/train.spacy\")\n\nnlp.initialize()\nnlp.train(db)\n\n# \u30e2\u30c7\u30eb\u306e\u9069\u7528\ntext = \"\u5927\u7d71\u9818\u9078\u6319\u306e\u6295\u7968\u304c\u59cb\u307e\u3063\u305f\u3002\"\ndoc = nlp(text)\nprint(doc.cats)<\/pre>\n\n\n\n<p>\u3053\u306e\u30b3\u30fc\u30c9\u3067\u306f\u3001\u307e\u305a\u5b66\u7fd2\u30c7\u30fc\u30bf\u3092\u6e96\u5099\u3057\u307e\u3059\u3002\u5404\u30c6\u30ad\u30b9\u30c8\u306b\u5bfe\u3057\u3066\u3001\u5bfe\u5fdc\u3059\u308b\u30ab\u30c6\u30b4\u30ea\u3092\u8f9e\u66f8\u5f62\u5f0f\u3067\u6307\u5b9a\u3057\u307e\u3059\u3002\u6b21\u306b\u3001spacy\u306e\u30e2\u30c7\u30eb\u3092\u4f5c\u6210\u3057\u3001TextCategorizer\u30b3\u30f3\u30dd\u30fc\u30cd\u30f3\u30c8\u3092\u8ffd\u52a0\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u5b66\u7fd2\u30c7\u30fc\u30bf\u3092DocBin\u30aa\u30d6\u30b8\u30a7\u30af\u30c8\u306b\u5909\u63db\u3057\u3001to_disk()\u30e1\u30bd\u30c3\u30c9\u3067\u4fdd\u5b58\u3057\u307e\u3059\u3002\u305d\u3057\u3066\u3001nlp.train()\u3067\u30e2\u30c7\u30eb\u3092\u5b66\u7fd2\u3055\u305b\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u5b66\u7fd2\u6e08\u307f\u30e2\u30c7\u30eb\u3092\u65b0\u3057\u3044\u30c6\u30ad\u30b9\u30c8\u306b\u9069\u7528\u3059\u308b\u3068\u3001\u305d\u306e\u30c6\u30ad\u30b9\u30c8\u304c\u3069\u306e\u30ab\u30c6\u30b4\u30ea\u306b\u5c5e\u3059\u308b\u304b\u304c\u4e88\u6e2c\u3055\u308c\u307e\u3059\u3002doc.cats\u30d7\u30ed\u30d1\u30c6\u30a3\u306b\u3001\u5404\u30ab\u30c6\u30b4\u30ea\u306e\u78ba\u4fe1\u5ea6\u304c\u683c\u7d0d\u3055\u308c\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u3053\u306e\u3088\u3046\u306b\u3001spacy\u3092\u4f7f\u3048\u3070\u3001\u5927\u91cf\u306e\u30c6\u30ad\u30b9\u30c8\u3092\u81ea\u52d5\u7684\u306b\u5206\u985e\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002\u30cb\u30e5\u30fc\u30b9\u8a18\u4e8b\u306e\u81ea\u52d5\u30bf\u30b0\u4ed8\u3051\u3084\u3001\u30ec\u30d3\u30e5\u30fc\u306e\u8a55\u4fa1\u5206\u6790\u306a\u3069\u3001\u69d8\u3005\u306a\u5fdc\u7528\u304c\u8003\u3048\u3089\u308c\u308b\u3067\u3057\u3087\u3046\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-15\">\u611f\u60c5\u5206\u6790 \u2013 SNS\u306e\u6295\u7a3f\u306a\u3069\u304b\u3089\u30e6\u30fc\u30b6\u30fc\u306e\u611f\u60c5\u3092\u5224\u5b9a\u3059\u308b<\/h3>\n\n\n\n<p>\u611f\u60c5\u5206\u6790\u306f\u3001\u30c6\u30ad\u30b9\u30c8\u4e2d\u306b\u542b\u307e\u308c\u308b\u611f\u60c5\u3092\u6570\u5024\u5316\u3059\u308b\u6280\u8853\u3067\u3059\u3002SNS\u306e\u6295\u7a3f\u3084\u30e6\u30fc\u30b6\u30fc\u30ec\u30d3\u30e5\u30fc\u306a\u3069\u304b\u3089\u3001\u66f8\u304d\u624b\u306e\u611f\u60c5\u3092\u63a8\u5b9a\u3059\u308b\u306e\u306b\u5f79\u7acb\u3061\u307e\u3059\u3002<\/p>\n\n\n\n<p>spacy\u3068scikit-learn\u3092\u7d44\u307f\u5408\u308f\u305b\u308b\u3053\u3068\u3067\u3001\u611f\u60c5\u5206\u6790\u5668\u3092\u69cb\u7bc9\u3067\u304d\u307e\u3059\u3002\u4ee5\u4e0b\u306f\u3001\u30ec\u30d3\u30e5\u30fc\u30c6\u30ad\u30b9\u30c8\u304b\u3089\u611f\u60c5\u6975\u6027\u3092\u5224\u5b9a\u3059\u308b\u4f8b\u3067\u3059\u3002<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">import spacy\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.feature_extraction.text import TfidfVectorizer\n\n# \u5b66\u7fd2\u30c7\u30fc\u30bf\u306e\u6e96\u5099\ntrain_data = [\n    (\"\u3053\u306e\u88fd\u54c1\u306f\u6700\u9ad8\u3067\u3059\uff01\", 1),\n    (\"\u671f\u5f85\u306f\u305a\u308c\u3067\u3057\u305f\u3002\u4e8c\u5ea6\u3068\u8cb7\u3044\u307e\u305b\u3093\u3002\", 0),\n    (\"\u5024\u6bb5\u306e\u5272\u306b\u826f\u3044\u5546\u54c1\u3060\u3068\u601d\u3044\u307e\u3059\u3002\", 1),\n    # ...\n]\n\n# \u7279\u5fb4\u91cf\u306e\u62bd\u51fa\nnlp = spacy.load(\"ja_core_news_sm\")\nvectorizer = TfidfVectorizer()\n\ntexts, labels = zip(*train_data)\nX = vectorizer.fit_transform([nlp(text).text for text in texts])\n\n# \u30e2\u30c7\u30eb\u306e\u5b66\u7fd2\nmodel = LogisticRegression()\nmodel.fit(X, labels)\n\n# \u30e2\u30c7\u30eb\u306e\u9069\u7528\ntext = \"\u826f\u3044\u70b9\u3082\u3042\u308b\u3051\u3069\u3001\u3082\u3046\u5c11\u3057\u6539\u5584\u306e\u4f59\u5730\u304c\u3042\u308a\u305d\u3046\u3002\"\ndoc = nlp(text)\nvec = vectorizer.transform([doc.text])\nprint(model.predict(vec)[0])<\/pre>\n\n\n\n<p>\u3053\u306e\u30b3\u30fc\u30c9\u3067\u306f\u3001\u5b66\u7fd2\u30c7\u30fc\u30bf\u3068\u3057\u3066\u3001\u30c6\u30ad\u30b9\u30c8\u3068\u5bfe\u5fdc\u3059\u308b\u611f\u60c5\u30e9\u30d9\u30eb\uff081:\u80af\u5b9a\u7684\u30010:\u5426\u5b9a\u7684\uff09\u306e\u30da\u30a2\u3092\u7528\u610f\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u6b21\u306b\u3001TF-IDF\u30d9\u30af\u30c8\u30eb\u5316\u3092\u7528\u3044\u3066\u3001\u5404\u30c6\u30ad\u30b9\u30c8\u3092\u6570\u5024\u30d9\u30af\u30c8\u30eb\u306b\u5909\u63db\u3057\u307e\u3059\u3002\u3053\u306e\u3068\u304d\u3001spacy\u306e\u30e2\u30c7\u30eb\u3092\u4f7f\u3063\u3066\u30c6\u30ad\u30b9\u30c8\u3092\u524d\u51e6\u7406\u3057\u3066\u304a\u304f\u3068\u3001\u3088\u308a\u826f\u3044\u7d50\u679c\u304c\u5f97\u3089\u308c\u308b\u3067\u3057\u3087\u3046\u3002<\/p>\n\n\n\n<p>scikit-learn\u306eLogisticRegression\u30af\u30e9\u30b9\u3092\u4f7f\u3063\u3066\u3001\u30ed\u30b8\u30b9\u30c6\u30a3\u30c3\u30af\u56de\u5e30\u30e2\u30c7\u30eb\u3092\u5b66\u7fd2\u3055\u305b\u307e\u3059\u3002\u5b66\u7fd2\u6e08\u307f\u30e2\u30c7\u30eb\u306b\u65b0\u3057\u3044\u30c6\u30ad\u30b9\u30c8\u3092\u4e0e\u3048\u308b\u3068\u3001\u305d\u306e\u30c6\u30ad\u30b9\u30c8\u304c\u80af\u5b9a\u7684\u304b\u5426\u5b9a\u7684\u304b\u3092\u5224\u5b9a\u3057\u3066\u304f\u308c\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u3053\u306e\u3088\u3046\u306a\u611f\u60c5\u5206\u6790\u5668\u3092\u4f7f\u3048\u3070\u3001SNS\u30e6\u30fc\u30b6\u30fc\u306e\u53cd\u5fdc\u3092\u81ea\u52d5\u7684\u306b\u96c6\u8a08\u3057\u305f\u308a\u3001\u5546\u54c1\u30ec\u30d3\u30e5\u30fc\u304b\u3089\u9867\u5ba2\u306e\u6e80\u8db3\u5ea6\u3092\u6e2c\u5b9a\u3057\u305f\u308a\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002\u30de\u30fc\u30b1\u30c6\u30a3\u30f3\u30b0\u3084\u9867\u5ba2\u30b5\u30dd\u30fc\u30c8\u306e\u5206\u91ce\u3067\u3001\u5927\u3044\u306b\u6d3b\u7528\u3067\u304d\u308b\u3067\u3057\u3087\u3046\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-16\">\u30ad\u30fc\u30ef\u30fc\u30c9\u62bd\u51fa \u2013 \u6587\u66f8\u306e\u4e3b\u984c\u3092\u8868\u3059\u91cd\u8981\u8a9e\u53e5\u306e\u540c\u5b9a<\/h3>\n\n\n\n<p>\u30ad\u30fc\u30ef\u30fc\u30c9\u62bd\u51fa\u306f\u3001\u30c6\u30ad\u30b9\u30c8\u4e2d\u306e\u4e3b\u8981\u306a\u30c8\u30d4\u30c3\u30af\u3092\u8868\u3059\u8a9e\u53e5\u3092\u81ea\u52d5\u7684\u306b\u898b\u3064\u3051\u51fa\u3059\u6280\u8853\u3067\u3059\u3002\u6587\u66f8\u306e\u8981\u7d04\u3084\u3001\u691c\u7d22\u30a8\u30f3\u30b8\u30f3\u306e\u7d22\u5f15\u4ed8\u3051\u306a\u3069\u306b\u5229\u7528\u3055\u308c\u307e\u3059\u3002<\/p>\n\n\n\n<p>spacy\u306eDoc.similarity()\u30e1\u30bd\u30c3\u30c9\u3092\u5fdc\u7528\u3059\u308b\u3053\u3068\u3067\u3001\u7c21\u6613\u7684\u306a\u30ad\u30fc\u30ef\u30fc\u30c9\u62bd\u51fa\u304c\u53ef\u80fd\u3067\u3059\u3002\u4ee5\u4e0b\u306f\u3001\u305d\u306e\u5b9f\u88c5\u4f8b\u3067\u3059\u3002<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">import spacy\nfrom collections import Counter\n\nnlp = spacy.load(\"ja_core_news_sm\")\ndoc = nlp(\"\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306f\u3001\u30b3\u30f3\u30d4\u30e5\u30fc\u30bf\u3092\u4f7f\u3063\u3066\u4eba\u9593\u306e\u8a00\u8a9e\u3092\u5206\u6790\u3057\u3001\u7406\u89e3\u3059\u308b\u305f\u3081\u306e\u6280\u8853\u3067\u3059\u3002\u6a5f\u68b0\u7ffb\u8a33\u3084\u611f\u60c5\u5206\u6790\u306a\u3069\u3001\u5e45\u5e83\u3044\u5fdc\u7528\u5206\u91ce\u304c\u3042\u308a\u307e\u3059\u3002\")\n\n# \u540d\u8a5e\u53e5\u306e\u307f\u3092\u62bd\u51fa\nnoun_chunks = [chunk.text for chunk in doc.noun_chunks]\n\n# TF-IDF\u306b\u3088\u308b\u30b9\u30b3\u30a2\u4ed8\u3051\nword_freq = Counter(noun_chunks)\nword_scores = {}\nfor word in word_freq.keys():\n    word_doc = nlp(word)\n    score = 0\n    for chunk in noun_chunks:\n        chunk_doc = nlp(chunk)\n        score += word_doc.similarity(chunk_doc)\n    word_scores[word] = score \/ len(noun_chunks)\n\n# \u30b9\u30b3\u30a2\u306e\u9ad8\u3044\u4e0a\u4f4d5\u4ef6\u3092\u30ad\u30fc\u30ef\u30fc\u30c9\u3068\u3057\u3066\u62bd\u51fa\nkeywords = sorted(word_scores.items(), key=lambda x: x[1], reverse=True)[:5]\nprint(keywords)<\/pre>\n\n\n\n<p>\u3053\u306e\u30b3\u30fc\u30c9\u3067\u306f\u3001\u307e\u305aspacy\u306eDoc.noun_chunks\u30d7\u30ed\u30d1\u30c6\u30a3\u3092\u4f7f\u3063\u3066\u3001\u6587\u66f8\u4e2d\u306e\u540d\u8a5e\u53e5\u3092\u62bd\u51fa\u3057\u307e\u3059\u3002\u3053\u308c\u306b\u3088\u308a\u3001\u91cd\u8981\u305d\u3046\u306a\u8a9e\u53e5\u306e\u307f\u3092\u9078\u5225\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u6b21\u306b\u3001\u62bd\u51fa\u3057\u305f\u540d\u8a5e\u53e5\u306b\u3064\u3044\u3066\u3001TF-IDF\u306b\u4f3c\u305f\u6307\u6a19\u3067\u30b9\u30b3\u30a2\u3092\u8a08\u7b97\u3057\u307e\u3059\u3002\u5404\u540d\u8a5e\u53e5\u3068\u3001\u6587\u66f8\u5168\u4f53\u3068\u306e\u985e\u4f3c\u5ea6\u3092\u8a08\u7b97\u3057\u3001\u305d\u306e\u5e73\u5747\u5024\u3092\u30b9\u30b3\u30a2\u3068\u3057\u307e\u3059\u3002\u985e\u4f3c\u5ea6\u306e\u8a08\u7b97\u306b\u306fDoc.similarity()\u30e1\u30bd\u30c3\u30c9\u3092\u4f7f\u7528\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u6700\u5f8c\u306b\u3001\u30b9\u30b3\u30a2\u306e\u9ad8\u3044\u4e0a\u4f4d5\u4ef6\u306e\u540d\u8a5e\u53e5\u3092\u3001\u30ad\u30fc\u30ef\u30fc\u30c9\u3068\u3057\u3066\u51fa\u529b\u3057\u307e\u3059\u3002\u3053\u306e\u3088\u3046\u306b\u3057\u3066\u3001\u6587\u66f8\u306e\u4e3b\u984c\u3092\u8868\u3059\u8a9e\u53e5\u3092\u81ea\u52d5\u7684\u306b\u898b\u3064\u3051\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u30ad\u30fc\u30ef\u30fc\u30c9\u62bd\u51fa\u306f\u3001\u5927\u91cf\u306e\u6587\u66f8\u3092\u8981\u7d04\u3057\u305f\u308a\u3001\u95a2\u9023\u3059\u308b\u30c8\u30d4\u30c3\u30af\u3092\u63a2\u3057\u305f\u308a\u3059\u308b\u969b\u306b\u5a01\u529b\u3092\u767a\u63ee\u3057\u307e\u3059\u3002spacy\u3092\u4f7f\u3048\u3070\u3001\u30b7\u30f3\u30d7\u30eb\u306a\u65b9\u6cd5\u3067 \u30ad\u30fc\u30ef\u30fc\u30c9\u3092\u62bd\u51fa\u3067\u304d\u308b\u3067\u3057\u3087\u3046\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-17\">\u985e\u4f3c\u5ea6\u5224\u5b9a \u2013 \u6587\u7ae0\u540c\u58eb\u306e\u610f\u5473\u7684\u306a\u8fd1\u3055\u3092\u6e2c\u308b<\/h3>\n\n\n\n<p>\u985e\u4f3c\u5ea6\u5224\u5b9a\u306f\u30012\u3064\u306e\u6587\u7ae0\u304c\u3069\u308c\u3060\u3051\u4f3c\u3066\u3044\u308b\u304b\u3092\u6570\u5024\u5316\u3059\u308b\u6280\u8853\u3067\u3059\u3002\u6587\u66f8\u691c\u7d22\u3084\u3001\u91cd\u8907\u30b3\u30f3\u30c6\u30f3\u30c4\u306e\u691c\u77e5\u306a\u3069\u306b\u5fdc\u7528\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<p>spacy\u306eDoc.similarity()\u30e1\u30bd\u30c3\u30c9\u3092\u4f7f\u3046\u3068\u3001\u6587\u7ae0\u9593\u306e\u985e\u4f3c\u5ea6\u3092\u7c21\u5358\u306b\u8a08\u7b97\u3067\u304d\u307e\u3059\u3002\u4ee5\u4e0b\u306f\u3001\u305d\u306e\u5b9f\u88c5\u4f8b\u3067\u3059\u3002<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">import spacy\n\nnlp = spacy.load(\"ja_core_news_sm\")\n\ndoc1 = nlp(\"\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306f\u3001\u4eba\u5de5\u77e5\u80fd\u306e\u4e00\u5206\u91ce\u3067\u3059\u3002\")\ndoc2 = nlp(\"\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306f\u3001\u30b3\u30f3\u30d4\u30e5\u30fc\u30bf\u3092\u4f7f\u3063\u3066\u4eba\u9593\u306e\u8a00\u8a9e\u3092\u5206\u6790\u3059\u308b\u6280\u8853\u3067\u3059\u3002\")\ndoc3 = nlp(\"\u6a5f\u68b0\u5b66\u7fd2\u306f\u3001\u30c7\u30fc\u30bf\u304b\u3089\u30d1\u30bf\u30fc\u30f3\u3092\u5b66\u7fd2\u3059\u308b\u3053\u3068\u3067\u3001\u672a\u77e5\u306e\u30c7\u30fc\u30bf\u3092\u4e88\u6e2c\u3059\u308b\u624b\u6cd5\u3067\u3059\u3002\")\n\nprint(doc1.similarity(doc2))\nprint(doc1.similarity(doc3))<\/pre>\n\n\n\n<p>\u3053\u306e\u30b3\u30fc\u30c9\u3067\u306f\u30013\u3064\u306e\u6587\u7ae0\u3092spacy\u306enlp\u30aa\u30d6\u30b8\u30a7\u30af\u30c8\u306b\u6e21\u3057\u3066\u3001Doc\u30aa\u30d6\u30b8\u30a7\u30af\u30c8\u306b\u5909\u63db\u3057\u307e\u3059\u3002\u305d\u3057\u3066\u3001Doc.similarity()\u30e1\u30bd\u30c3\u30c9\u3092\u4f7f\u3063\u3066\u3001\u6587\u7ae0\u9593\u306e\u985e\u4f3c\u5ea6\u3092\u8a08\u7b97\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u985e\u4f3c\u5ea6\u306f0\u304b\u30891\u307e\u3067\u306e\u5024\u3092\u53d6\u308a\u30011\u306b\u8fd1\u3044\u307b\u3069\u985e\u4f3c\u5ea6\u304c\u9ad8\u3044\u3053\u3068\u3092\u8868\u3057\u307e\u3059\u3002\u4e0a\u8a18\u306e\u4f8b\u3067\u306f\u3001doc1\u3068doc2\u306e\u985e\u4f3c\u5ea6\u306f\u6bd4\u8f03\u7684\u9ad8\u304f\u3001doc1\u3068doc3\u306e\u985e\u4f3c\u5ea6\u306f\u4f4e\u304f\u306a\u3063\u3066\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u3053\u306e\u985e\u4f3c\u5ea6\u5224\u5b9a\u306f\u3001spacy\u306e\u8a00\u8a9e\u30e2\u30c7\u30eb\u304c\u6301\u3064\u5358\u8a9e\u30d9\u30af\u30c8\u30eb\u3092\u5229\u7528\u3057\u3066\u3044\u307e\u3059\u3002\u5358\u8a9e\u30d9\u30af\u30c8\u30eb\u306f\u3001\u5404\u5358\u8a9e\u306e\u610f\u5473\u3092\u591a\u6b21\u5143\u7a7a\u9593\u4e0a\u306e\u70b9\u3068\u3057\u3066\u8868\u73fe\u3057\u305f\u3082\u306e\u3067\u3059\u3002\u6587\u7ae0\u306e\u985e\u4f3c\u5ea6\u306f\u3001\u305d\u306e\u6587\u7ae0\u306b\u542b\u307e\u308c\u308b\u5358\u8a9e\u30d9\u30af\u30c8\u30eb\u306e\u5e73\u5747\u5024\u540c\u58eb\u306e\u30b3\u30b5\u30a4\u30f3\u985e\u4f3c\u5ea6\u3068\u3057\u3066\u8a08\u7b97\u3055\u308c\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u985e\u4f3c\u5ea6\u5224\u5b9a\u3092\u5fdc\u7528\u3059\u308c\u3070\u3001\u4e0e\u3048\u3089\u308c\u305f\u6587\u7ae0\u3068\u4f3c\u305f\u5185\u5bb9\u306e\u6587\u7ae0\u3092\u5927\u91cf\u306e\u30c7\u30fc\u30bf\u306e\u4e2d\u304b\u3089\u63a2\u3057\u51fa\u3057\u305f\u308a\u3001\u91cd\u8907\u3059\u308b\u8a18\u4e8b\u3092\u81ea\u52d5\u7684\u306b\u524a\u9664\u3057\u305f\u308a\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u308b\u3067\u3057\u3087\u3046\u3002<\/p>\n\n\n\n<p>\u4ee5\u4e0a\u3001spacy\u306b\u3088\u308b\u5b9f\u8df5\u7684\u306a\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306e\u30e6\u30fc\u30b9\u30b1\u30fc\u30b9\u30924\u3064\u7d39\u4ecb\u3057\u307e\u3057\u305f\u3002\u30c6\u30ad\u30b9\u30c8\u5206\u985e\u3001\u611f\u60c5\u5206\u6790\u3001\u30ad\u30fc\u30ef\u30fc\u30c9\u62bd\u51fa\u3001\u985e\u4f3c\u5ea6\u5224\u5b9a\u306a\u3069\u3001\u305d\u308c\u305e\u308c\u5f37\u529b\u306a\u6a5f\u80fd\u3092\u5099\u3048\u3066\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u3053\u308c\u3089\u306e\u6280\u8853\u3092\u4f7f\u3044\u3053\u306a\u3059\u3053\u3068\u3067\u3001\u5927\u91cf\u306e\u30c6\u30ad\u30b9\u30c8\u30c7\u30fc\u30bf\u304b\u3089\u4fa1\u5024\u3042\u308b\u60c5\u5831\u3092\u52b9\u7387\u7684\u306b\u5f15\u304d\u51fa\u305b\u308b\u306f\u305a\u3067\u3059\u3002\u6b21\u7ae0\u3067\u306f\u3001\u3055\u3089\u306b\u9ad8\u5ea6\u306a\u30c8\u30d4\u30c3\u30af\u3068\u3057\u3066\u3001\u5927\u898f\u6a21\u8a00\u8a9e\u30e2\u30c7\u30eb\u3068spacy\u3092\u7d44\u307f\u5408\u308f\u305b\u305f\u6d3b\u7528\u6cd5\u3092\u63a2\u308a\u307e\u3059\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"i-18\">\u767a\u5c55\u7684\u306a\u8a71\u984c \u2013 \u5927\u898f\u6a21\u8a00\u8a9e\u30e2\u30c7\u30eb\u3068spacy\u306e\u9023\u643a<\/h2>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-19\">BERT\u3001GPT\u7b49\u306e\u4e8b\u524d\u5b66\u7fd2\u6e08\u307f\u30e2\u30c7\u30eb\u306e\u6d3b\u7528\u6cd5<\/h3>\n\n\n\n<p>\u8fd1\u5e74\u3001\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306e\u5206\u91ce\u3067\u306f\u3001BERT\u3001GPT\u306a\u3069\u306e\u5927\u898f\u6a21\u8a00\u8a9e\u30e2\u30c7\u30eb\u304c\u5927\u304d\u306a\u6ce8\u76ee\u3092\u96c6\u3081\u3066\u3044\u307e\u3059\u3002\u3053\u308c\u3089\u306e\u30e2\u30c7\u30eb\u306f\u3001\u5927\u91cf\u306e\u30c6\u30ad\u30b9\u30c8\u30c7\u30fc\u30bf\u3092\u7528\u3044\u3066\u4e8b\u524d\u5b66\u7fd2\u3055\u308c\u305f\u6df1\u5c64\u5b66\u7fd2\u30e2\u30c7\u30eb\u3067\u3042\u308a\u3001\u69d8\u3005\u306a\u30bf\u30b9\u30af\u306b\u8ee2\u7528\u3067\u304d\u308b\u6c4e\u7528\u7684\u306a\u8a00\u8a9e\u7406\u89e3\u80fd\u529b\u3092\u6301\u3063\u3066\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u5927\u898f\u6a21\u8a00\u8a9e\u30e2\u30c7\u30eb\u306e\u7279\u5fb4\u306f\u3001\u6587\u8108\u3092\u8003\u616e\u3057\u305f\u9ad8\u5ea6\u306a\u8a00\u8a9e\u7406\u89e3\u304c\u53ef\u80fd\u306a\u70b9\u306b\u3042\u308a\u307e\u3059\u3002\u4f8b\u3048\u3070BERT\u306f\u3001\u5358\u8a9e\u306e\u5468\u8fba\u60c5\u5831\u304b\u3089\u5358\u8a9e\u306e\u610f\u5473\u3092\u63a8\u5b9a\u3059\u308b\u300cMasked Language Model\u300d\u3068\u3001\u6587\u7ae0\u306e\u3064\u306a\u304c\u308a\u3092\u4e88\u6e2c\u3059\u308b\u300cNext Sentence Prediction\u300d\u3068\u3044\u30462\u3064\u306e\u30bf\u30b9\u30af\u3067\u5b66\u7fd2\u3055\u308c\u3066\u3044\u307e\u3059\u3002\u3053\u308c\u306b\u3088\u308a\u3001\u5358\u8a9e\u306e\u610f\u5473\u3060\u3051\u3067\u306a\u304f\u3001\u6587\u7ae0\u5168\u4f53\u306e\u6587\u8108\u3082\u8003\u616e\u3057\u305f\u8a00\u8a9e\u7406\u89e3\u304c\u53ef\u80fd\u306b\u306a\u308a\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u307e\u305f\u3001GPT\u306f\u5927\u91cf\u306e\u30c6\u30ad\u30b9\u30c8\u30c7\u30fc\u30bf\u3092\u7528\u3044\u3066\u8a00\u8a9e\u30e2\u30c7\u30eb\u3092\u5b66\u7fd2\u3059\u308b\u3053\u3068\u3067\u3001\u4eba\u9593\u306e\u3088\u3046\u306a\u81ea\u7136\u306a\u6587\u7ae0\u3092\u751f\u6210\u3067\u304d\u308b\u3088\u3046\u306b\u306a\u308a\u307e\u3057\u305f\u3002\u3053\u306e\u751f\u6210\u80fd\u529b\u306f\u3001\u8cea\u554f\u5fdc\u7b54\u3084\u8981\u7d04\u3001\u7ffb\u8a33\u306a\u3069\u3001\u69d8\u3005\u306a\u30bf\u30b9\u30af\u306b\u5fdc\u7528\u53ef\u80fd\u3067\u3059\u3002<\/p>\n\n\n\n<p>\u3053\u308c\u3089\u306e\u5927\u898f\u6a21\u8a00\u8a9e\u30e2\u30c7\u30eb\u3092\u6d3b\u7528\u3059\u308b\u3053\u3068\u3067\u3001spacy\u306b\u3088\u308b\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306e\u7cbe\u5ea6\u3092\u5927\u5e45\u306b\u5411\u4e0a\u3055\u305b\u3089\u308c\u307e\u3059\u3002\u4f8b\u3048\u3070\u3001\u56fa\u6709\u8868\u73fe\u62bd\u51fa\u306b\u304a\u3051\u308b\u66d6\u6627\u6027\u306e\u89e3\u6d88\u3084\u3001\u611f\u60c5\u5206\u6790\u306b\u304a\u3051\u308b\u6587\u8108\u306e\u8003\u616e\u306a\u3069\u3001\u69d8\u3005\u306a\u5834\u9762\u3067\u5927\u898f\u6a21\u8a00\u8a9e\u30e2\u30c7\u30eb\u306e\u529b\u3092\u501f\u308a\u308b\u3053\u3068\u304c\u3067\u304d\u308b\u3067\u3057\u3087\u3046\u3002<\/p>\n\n\n\n<p>\u4ee5\u4e0b\u306f\u3001BERT\u3092\u4f7f\u3063\u305f\u611f\u60c5\u5206\u6790\u306e\u4f8b\u3067\u3059\u3002<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">import spacy\nfrom transformers import pipeline\n\nnlp = spacy.load(\"ja_core_news_sm\")\nanalyzer = pipeline(\"sentiment-analysis\", model=\"\u4f7f\u3044\u305f\u3044\u30e2\u30c7\u30eb\u540d\", tokenizer=\"daigo\/bert-base-japanese-sentiment\")\n\ndoc = nlp(\"\u3053\u306e\u6620\u753b\u306f\u6700\u9ad8\u3067\u3057\u305f\uff01\u611f\u52d5\u3057\u3066\u6d99\u304c\u6b62\u307e\u308a\u307e\u305b\u3093\u3067\u3057\u305f\u3002\")\n\nfor sent in doc.sents:\n    result = analyzer(sent.text)[0]\n    print(sent.text, result[\"label\"], result[\"score\"])<\/pre>\n\n\n\n<p>\u3053\u306e\u30b3\u30fc\u30c9\u3067\u306f\u3001spacy\u3092\u4f7f\u3063\u3066\u6587\u7ae0\u3092\u6587\u5358\u4f4d\u306b\u5206\u5272\u3057\u3001\u5404\u6587\u3092BERT\u30d9\u30fc\u30b9\u306e\u611f\u60c5\u5206\u6790\u5668\u306b\u6e21\u3057\u3066\u3044\u307e\u3059\u3002\u5206\u6790\u7d50\u679c\u306f\u3001\u30e9\u30d9\u30eb\uff08\u201d\u30dd\u30b8\u30c6\u30a3\u30d6\u201d \u307e\u305f\u306f \u201c\u30cd\u30ac\u30c6\u30a3\u30d6\u201d\uff09\u3068\u30b9\u30b3\u30a2\uff08\u78ba\u4fe1\u5ea6\uff09\u3067\u8fd4\u3055\u308c\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u3053\u308c\u306f\u3001spacy\u3068Transformers\u30e9\u30a4\u30d6\u30e9\u30ea\u3092\u7d44\u307f\u5408\u308f\u305b\u305f\u4e00\u4f8b\u3067\u3059\u304c\u3001\u3053\u306e\u3088\u3046\u306a\u5f62\u3067\u5927\u898f\u6a21\u8a00\u8a9e\u30e2\u30c7\u30eb\u306e\u6a5f\u80fd\u3092spacy\u306b\u53d6\u308a\u8fbc\u3080\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-20\">Transformers\u30e9\u30a4\u30d6\u30e9\u30ea\u3068\u306e\u9023\u643a\u306b\u3088\u308b\u30e2\u30c7\u30eb\u306e\u9ad8\u5ea6\u5316<\/h3>\n\n\n\n<p>\u5927\u898f\u6a21\u8a00\u8a9e\u30e2\u30c7\u30eb\u3092\u6271\u3046\u4e0a\u3067\u6b20\u304b\u305b\u306a\u3044\u306e\u304c\u3001Transformers\u30e9\u30a4\u30d6\u30e9\u30ea\u3067\u3059\u3002Transformers\u306fBERT\u3084GPT\u306a\u3069\u306e\u4e8b\u524d\u5b66\u7fd2\u6e08\u307f\u30e2\u30c7\u30eb\u3092\u7c21\u5358\u306b\u5229\u7528\u3059\u308b\u305f\u3081\u306ePython\u30e9\u30a4\u30d6\u30e9\u30ea\u3067\u3001spacy\u3068\u306e\u89aa\u548c\u6027\u3082\u9ad8\u304f\u306a\u3063\u3066\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<p>Transformers\u3092\u4f7f\u3048\u3070\u3001\u5927\u898f\u6a21\u8a00\u8a9e\u30e2\u30c7\u30eb\u306e\u30d5\u30a1\u30a4\u30f3\u30c1\u30e5\u30fc\u30cb\u30f3\u30b0\u3092\u884c\u3044\u3001\u30bf\u30b9\u30af\u7279\u5316\u578b\u306e\u30e2\u30c7\u30eb\u3092\u4f5c\u6210\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002\u4f8b\u3048\u3070\u3001\u5546\u54c1\u30ec\u30d3\u30e5\u30fc\u306e\u611f\u60c5\u5206\u6790\u7528\u306bBERT\u3092\u30d5\u30a1\u30a4\u30f3\u30c1\u30e5\u30fc\u30cb\u30f3\u30b0\u3059\u308c\u3070\u3001\u3088\u308a\u9ad8\u7cbe\u5ea6\u306a\u5206\u6790\u304c\u53ef\u80fd\u306b\u306a\u308b\u3067\u3057\u3087\u3046\u3002<\/p>\n\n\n\n<p>\u307e\u305f\u3001Transformers\u306epipelines\u3068\u3044\u3046\u6a5f\u80fd\u3092\u4f7f\u3048\u3070\u3001spacy\u306e\u8a00\u8a9e\u51e6\u7406\u30d1\u30a4\u30d7\u30e9\u30a4\u30f3\u306b\u5927\u898f\u6a21\u8a00\u8a9e\u30e2\u30c7\u30eb\u3092\u7d44\u307f\u8fbc\u3080\u3053\u3068\u3082\u3067\u304d\u307e\u3059\u3002\u4ee5\u4e0b\u306f\u3001\u56fa\u6709\u8868\u73fe\u62bd\u51fa\u306bBERT\u3092\u5229\u7528\u3059\u308b\u4f8b\u3067\u3059\u3002<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">import spacy\nfrom transformers import pipeline\n\nnlp = spacy.load(\"ja_core_news_sm\")\nner_pipe = pipeline(\"ner\", model=\"\u4f7f\u3044\u305f\u3044\u30e2\u30c7\u30eb\u540d\", aggregation_strategy=\"simple\")\n\ndef bert_ner(doc):\n    ents = []\n    for ent in ner_pipe(doc.text):\n        start = doc.char_span(ent[\"start\"], ent[\"end\"])\n        if start is None:\n            continue\n        ents.append(spacy.tokens.Span(doc, start.i, start.i + 1, label=ent[\"entity\"]))\n    doc.ents = ents\n    return doc\n\nnlp.add_pipe(bert_ner)\n\ndoc = nlp(\"\u590f\u76ee\u6f31\u77f3\u306e\u4ee3\u8868\u4f5c\u3067\u3042\u308b\u574a\u3063\u3061\u3083\u3093\u306f\u3001\u660e\u6cbb\u6642\u4ee3\u3092\u821e\u53f0\u306b\u3057\u305f\u7269\u8a9e\u3067\u3059\u3002\")\nfor ent in doc.ents:\n    print(ent.text, ent.label_)<\/pre>\n\n\n\n<p>\u3053\u306e\u30b3\u30fc\u30c9\u3067\u306f\u3001spacy\u306e\u30d1\u30a4\u30d7\u30e9\u30a4\u30f3\u306bbert_ner\u95a2\u6570\u3092\u8ffd\u52a0\u3057\u3066\u3044\u307e\u3059\u3002\u3053\u306e\u95a2\u6570\u306f\u3001Transformers\u306ener pipeline\u3092\u4f7f\u3063\u3066\u56fa\u6709\u8868\u73fe\u3092\u62bd\u51fa\u3057\u3001\u305d\u306e\u7d50\u679c\u3092spacy\u306eSpan\u30aa\u30d6\u30b8\u30a7\u30af\u30c8\u306b\u5909\u63db\u3057\u3066\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u3053\u308c\u306b\u3088\u308a\u3001spacy\u306e\u901a\u5e38\u306e\u56fa\u6709\u8868\u73fe\u62bd\u51fa\u5668\u3068BERT\u30d9\u30fc\u30b9\u306e\u62bd\u51fa\u5668\u3092\u7d44\u307f\u5408\u308f\u305b\u308b\u3053\u3068\u304c\u3067\u304d\u3001\u3088\u308a\u9ad8\u5ea6\u306a\u62bd\u51fa\u304c\u53ef\u80fd\u306b\u306a\u308a\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u3053\u306e\u3088\u3046\u306b\u3001Transformers\u30e9\u30a4\u30d6\u30e9\u30ea\u3068spacy\u3092\u3046\u307e\u304f\u9023\u643a\u3055\u305b\u308b\u3053\u3068\u3067\u3001\u5927\u898f\u6a21\u8a00\u8a9e\u30e2\u30c7\u30eb\u306e\u6069\u6075\u3092\u6700\u5927\u9650\u306b\u53d7\u3051\u308b\u3053\u3068\u304c\u3067\u304d\u308b\u306e\u3067\u3059\u3002<\/p>\n\n\n\n<p>\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306e\u5206\u91ce\u3067\u306f\u3001\u5927\u898f\u6a21\u8a00\u8a9e\u30e2\u30c7\u30eb\u306e\u767b\u5834\u306b\u3088\u3063\u3066\u3001\u65b0\u3057\u3044\u53ef\u80fd\u6027\u304c\u6b21\u3005\u3068\u958b\u304b\u308c\u3066\u3044\u307e\u3059\u3002\u3053\u308c\u3089\u306e\u30e2\u30c7\u30eb\u3092spacy\u3068\u7d44\u307f\u5408\u308f\u305b\u308b\u3053\u3068\u3067\u3001\u3088\u308a\u9ad8\u5ea6\u3067\u5b9f\u7528\u7684\u306a\u8a00\u8a9e\u51e6\u7406\u30b7\u30b9\u30c6\u30e0\u3092\u69cb\u7bc9\u3067\u304d\u308b\u3067\u3057\u3087\u3046\u3002<\/p>\n\n\n\n<p>\u662f\u975e\u3001\u5927\u898f\u6a21\u8a00\u8a9e\u30e2\u30c7\u30eb\u3068spacy\u3092\u7d44\u307f\u5408\u308f\u305b\u3066\u3001\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306e\u65b0\u305f\u306a\u5730\u5e73\u3092\u5207\u308a\u958b\u3044\u3066\u307f\u3066\u304f\u3060\u3055\u3044\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"i-21\">\u307e\u3068\u3081 \u2013 \u6700\u65b0\u306espacy\u3067\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306e\u53ef\u80fd\u6027\u3092\u5e83\u3052\u3088\u3046<\/h2>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-22\">spacy\u304c\u3082\u305f\u3089\u3059\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306e\u6c11\u4e3b\u5316<\/h3>\n\n\n\n<p>\u672c\u8a18\u4e8b\u3067\u306f\u3001Python\u306e\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u30e9\u30a4\u30d6\u30e9\u30ea\u3067\u3042\u308bspacy\u306b\u3064\u3044\u3066\u3001\u305d\u306e\u6982\u8981\u304b\u3089\u5b9f\u8df5\u7684\u306a\u5fdc\u7528\u307e\u3067\u3001\u5e45\u5e83\u304f\u89e3\u8aac\u3057\u3066\u304d\u307e\u3057\u305f\u3002<\/p>\n\n\n\n<p>spacy\u306f\u3001\u9ad8\u901f\u3067\u52b9\u7387\u7684\u306a\u8a00\u8a9e\u51e6\u7406\u3092\u5b9f\u73fe\u3059\u308b\u3060\u3051\u3067\u306a\u304f\u3001\u30b7\u30f3\u30d7\u30eb\u3067\u76f4\u611f\u7684\u306aAPI\u3092\u63d0\u4f9b\u3059\u308b\u3053\u3068\u3067\u3001\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u3092\u3088\u308a\u8eab\u8fd1\u306a\u3082\u306e\u306b\u3057\u3066\u3044\u307e\u3059\u3002\u5c02\u9580\u7684\u306a\u77e5\u8b58\u304c\u306a\u304f\u3066\u3082\u3001\u5c11\u306a\u3044\u30b3\u30fc\u30c9\u91cf\u3067\u9ad8\u5ea6\u306a\u8a00\u8a9e\u51e6\u7406\u3092\u5b9f\u88c5\u3067\u304d\u308b\u306e\u306f\u3001spacy\u306e\u5927\u304d\u306a\u9b45\u529b\u3068\u8a00\u3048\u308b\u3067\u3057\u3087\u3046\u3002<\/p>\n\n\n\n<p>\u307e\u305f\u3001\u4e8b\u524d\u5b66\u7fd2\u6e08\u307f\u306e\u8a00\u8a9e\u30e2\u30c7\u30eb\u3092\u8c4a\u5bcc\u306b\u63d0\u4f9b\u3057\u3066\u3044\u308b\u3053\u3068\u3082\u3001spacy\u306e\u5f37\u307f\u306e\u4e00\u3064\u3067\u3059\u3002\u591a\u8a00\u8a9e\u5bfe\u5fdc\u3082\u9032\u3093\u3067\u304a\u308a\u3001\u65e5\u672c\u8a9e\u3092\u542b\u3080100\u4ee5\u4e0a\u306e\u8a00\u8a9e\u3067\u3001\u9ad8\u7cbe\u5ea6\u306a\u89e3\u6790\u304c\u53ef\u80fd\u306b\u306a\u3063\u3066\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u3053\u3046\u3057\u305fspacy\u306e\u7279\u9577\u306f\u3001\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306e\u6c11\u4e3b\u5316\u306b\u5927\u304d\u304f\u8ca2\u732e\u3057\u3066\u3044\u307e\u3059\u3002\u3053\u308c\u307e\u3067\u3001\u5c02\u9580\u5bb6\u306e\u9818\u57df\u3060\u3063\u305f\u8a00\u8a9e\u51e6\u7406\u306e\u6280\u8853\u304c\u3001spacy\u3092\u901a\u3058\u3066\u591a\u304f\u306e\u958b\u767a\u8005\u3084\u30c7\u30fc\u30bf\u30b5\u30a4\u30a8\u30f3\u30c6\u30a3\u30b9\u30c8\u306e\u624b\u306b\u5c4a\u304f\u3088\u3046\u306b\u306a\u3063\u305f\u306e\u3067\u3059\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-23\">\u5b9f\u52d9\u3084\u7814\u7a76\u3078\u306espacy\u306e\u6d3b\u7528\u30a2\u30a4\u30c7\u30a2<\/h3>\n\n\n\n<p>spacy\u306e\u6d3b\u7528\u6cd5\u306f\u3001\u5b9f\u306b\u591a\u5c90\u306b\u308f\u305f\u308a\u307e\u3059\u3002\u672c\u8a18\u4e8b\u3067\u7d39\u4ecb\u3057\u305f\u30c6\u30ad\u30b9\u30c8\u5206\u985e\u3084\u611f\u60c5\u5206\u6790\u3001\u30ad\u30fc\u30ef\u30fc\u30c9\u62bd\u51fa\u306a\u3069\u306f\u3001\u305d\u306e\u4e00\u90e8\u306b\u904e\u304e\u307e\u305b\u3093\u3002<\/p>\n\n\n\n<p>\u4f8b\u3048\u3070\u3001\u30cb\u30e5\u30fc\u30b9\u8a18\u4e8b\u306e\u81ea\u52d5\u30ab\u30c6\u30b4\u30ea\u5206\u985e\u3084\u3001SNS\u306e\u6295\u7a3f\u304b\u3089\u4f01\u696d\u30a4\u30e1\u30fc\u30b8\u3092\u5206\u6790\u3059\u308b\u306a\u3069\u3001\u30d3\u30b8\u30cd\u30b9\u306e\u73fe\u5834\u3067\u3082spacy\u306f\u5927\u3044\u306b\u529b\u3092\u767a\u63ee\u3059\u308b\u3067\u3057\u3087\u3046\u3002\u307e\u305f\u3001\u5b66\u8853\u7814\u7a76\u306e\u5206\u91ce\u3067\u3082\u3001\u6587\u732e\u306e\u8981\u7d04\u3084\u5f15\u7528\u95a2\u4fc2\u306e\u89e3\u6790\u306a\u3069\u3001spacy\u3092\u5fdc\u7528\u3067\u304d\u308b\u5834\u9762\u306f\u6570\u591a\u304f\u3042\u308a\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u3055\u3089\u306b\u3001\u30c1\u30e3\u30c3\u30c8\u30dc\u30c3\u30c8\u3084\u97f3\u58f0\u30a2\u30b7\u30b9\u30bf\u30f3\u30c8\u306e\u958b\u767a\u306b\u3082spacy\u306f\u6d3b\u7528\u3067\u304d\u307e\u3059\u3002\u30e6\u30fc\u30b6\u30fc\u306e\u767a\u8a71\u3092\u89e3\u6790\u3057\u3001\u9069\u5207\u306a\u5fdc\u7b54\u3092\u751f\u6210\u3059\u308b\u969b\u306b\u3001spacy\u306e\u8a00\u8a9e\u7406\u89e3\u306e\u6a5f\u80fd\u304c\u5927\u304d\u306a\u52a9\u3051\u306b\u306a\u308b\u306f\u305a\u3067\u3059\u3002<\/p>\n\n\n\n<p>\u3053\u306e\u3088\u3046\u306b\u3001spacy\u306f\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306e\u3042\u3089\u3086\u308b\u5834\u9762\u3067\u6d3b\u8e8d\u306e\u53ef\u80fd\u6027\u3092\u79d8\u3081\u3066\u3044\u307e\u3059\u3002\u307f\u306a\u3055\u3093\u3082\u3001spacy\u3092\u4f7f\u3063\u3066\u65ac\u65b0\u306a\u30a2\u30a4\u30c7\u30a2\u3092\u5b9f\u73fe\u3057\u3066\u307f\u3066\u306f\u3044\u304b\u304c\u3067\u3057\u3087\u3046\u304b\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-24\">spacy\u306e\u4eca\u5f8c\u306e\u767a\u5c55\u3078\u306e\u671f\u5f85<\/h3>\n\n\n\n<p>spacy\u306f\u73fe\u5728\u3082\u6d3b\u767a\u306b\u958b\u767a\u304c\u7d9a\u3051\u3089\u308c\u3066\u304a\u308a\u3001\u5e38\u306b\u65b0\u3057\u3044\u6a5f\u80fd\u304c\u8ffd\u52a0\u3055\u308c\u3066\u3044\u307e\u3059\u3002\u7279\u306b\u3001\u5927\u898f\u6a21\u8a00\u8a9e\u30e2\u30c7\u30eb\u3068\u306e\u9023\u643a\u306f\u3001\u4eca\u5f8c\u307e\u3059\u307e\u3059\u91cd\u8981\u306b\u306a\u3063\u3066\u304f\u308b\u3067\u3057\u3087\u3046\u3002<\/p>\n\n\n\n<p>BERT\u3084GPT\u306a\u3069\u306e\u8a00\u8a9e\u30e2\u30c7\u30eb\u3092\u53d6\u308a\u8fbc\u3080\u3053\u3068\u3067\u3001spacy\u306e\u8a00\u8a9e\u7406\u89e3\u306e\u7cbe\u5ea6\u306f\u98db\u8e8d\u7684\u306b\u5411\u4e0a\u3057\u307e\u3059\u3002\u3053\u308c\u306b\u3088\u308a\u3001\u3088\u308a\u8907\u96d1\u3067\u9ad8\u5ea6\u306a\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u30bf\u30b9\u30af\u3078\u306e\u6311\u6226\u304c\u53ef\u80fd\u306b\u306a\u308b\u306f\u305a\u3067\u3059\u3002<\/p>\n\n\n\n<p>\u307e\u305f\u3001spacy\u306e\u30b3\u30df\u30e5\u30cb\u30c6\u30a3\u3082\u5e74\u3005\u62e1\u5927\u3057\u3066\u304a\u308a\u3001\u4e16\u754c\u4e2d\u306e\u958b\u767a\u8005\u304c\u30b9\u30ad\u30eb\u3084\u30ce\u30a6\u30cf\u30a6\u3092\u5171\u6709\u3057\u3066\u3044\u307e\u3059\u3002\u3053\u3046\u3057\u305f\u77e5\u898b\u306e\u84c4\u7a4d\u304c\u3001spacy\u306e\u3055\u3089\u306a\u308b\u767a\u5c55\u3092\u5f8c\u62bc\u3057\u3057\u3066\u3044\u304f\u3053\u3068\u3067\u3057\u3087\u3046\u3002<\/p>\n\n\n\n<p>\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306e\u5206\u91ce\u306f\u3001\u3081\u307e\u3050\u308b\u3057\u3044\u901f\u3055\u3067\u9032\u5316\u3092\u7d9a\u3051\u3066\u3044\u307e\u3059\u3002\u305d\u3093\u306a\u4e2d\u306b\u3042\u3063\u3066\u3001spacy\u306f\u958b\u767a\u8005\u306b\u5bc4\u308a\u6dfb\u3044\u3001\u8a00\u8a9e\u51e6\u7406\u306e\u6700\u524d\u7dda\u3092\u5207\u308a\u62d3\u304f\u30c4\u30fc\u30eb\u3067\u3042\u308a\u7d9a\u3051\u308b\u3067\u3057\u3087\u3046\u3002<\/p>\n\n\n\n<p>spacy\u306e\u672a\u6765\u306b\u5927\u3044\u306b\u671f\u5f85\u3057\u3001\u672c\u8a18\u4e8b\u306e\u5185\u5bb9\u304c\u8aad\u8005\u306e\u307f\u306a\u3055\u3093\u306b\u3068\u3063\u3066\u3001\u6709\u76ca\u306a\u60c5\u5831\u3068\u306a\u308b\u3053\u3068\u3092\u9858\u3063\u3066\u3044\u307e\u3059\u3002\u305d\u308c\u3067\u306f\u3001spacy\u3092\u4f7f\u3063\u305f\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306e\u4e16\u754c\u3092\u3001\u305c\u3072\u81ea\u5206\u306e\u624b\u3067\u4f53\u9a13\u3057\u3066\u307f\u3066\u304f\u3060\u3055\u3044\u3002\u65b0\u305f\u306a\u767a\u898b\u3068\u53ef\u80fd\u6027\u304c\u3001\u304d\u3063\u3068\u305d\u3053\u306b\u3042\u308b\u306f\u305a\u3067\u3059\u3002<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u30e9\u30a4\u30d6\u30e9\u30easpacy\u306f\u3001Python\u3067\u9ad8\u901f\u304b\u3064\u4f7f\u3044\u3084\u3059\u3044\u8a00\u8a9e\u51e6\u7406\u3092\u5b9f\u73fe\u3057\u307e\u3059\u3002\u672c\u8a18\u4e8b\u3067\u306f\u3001spacy\u306e\u6982\u8981\u304b\u3089\u5b9f\u8df5\u7684\u306a\u6d3b\u7528\u6cd5\u307e\u3067\u3001\u30b5\u30f3\u30d7\u30eb\u30b3\u30fc\u30c9\u3092\u4ea4\u3048\u306a\u304c\u3089\u8a73\u3057\u304f\u89e3\u8aac\u3057\u307e\u3059\u3002\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\u306e\u53ef\u80fd\u6027\u3092\u5e83\u3052\u308bspa &#8230; <\/p>\n","protected":false},"author":1,"featured_media":481,"comment_status":"closed","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[4],"tags":[],"class_list":{"0":"post-195","1":"post","2":"type-post","3":"status-publish","4":"format-standard","5":"has-post-thumbnail","7":"category-python"},"_links":{"self":[{"href":"https:\/\/chocottopro.com\/index.php?rest_route=\/wp\/v2\/posts\/195","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/chocottopro.com\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/chocottopro.com\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/chocottopro.com\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/chocottopro.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=195"}],"version-history":[{"count":7,"href":"https:\/\/chocottopro.com\/index.php?rest_route=\/wp\/v2\/posts\/195\/revisions"}],"predecessor-version":[{"id":432,"href":"https:\/\/chocottopro.com\/index.php?rest_route=\/wp\/v2\/posts\/195\/revisions\/432"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/chocottopro.com\/index.php?rest_route=\/wp\/v2\/media\/481"}],"wp:attachment":[{"href":"https:\/\/chocottopro.com\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=195"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/chocottopro.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=195"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/chocottopro.com\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=195"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}