From 95959a480a6265880a80c75f7cbc4eb24b3467fa Mon Sep 17 00:00:00 2001 From: chris Date: Mon, 7 Jan 2019 21:52:43 +0800 Subject: [PATCH] fix --- Pipfile | 4 +- Pipfile.lock | 222 +++++------------- .../json_requests.py | 4 +- .../spider_requests.py | 10 +- spider_scrapy/spider_scrapy/items.py | 3 +- spider_scrapy/spider_scrapy/pipelines.py | 28 ++- spider_scrapy/spider_scrapy/settings.py | 3 +- .../spider_scrapy/spiders/duitang.py | 31 +-- 8 files changed, 109 insertions(+), 196 deletions(-) rename json_requests.py => spider_requests/json_requests.py (96%) rename spider_requests.py => spider_requests/spider_requests.py (96%) diff --git a/Pipfile b/Pipfile index 4bb10ab..724a4df 100644 --- a/Pipfile +++ b/Pipfile @@ -6,15 +6,13 @@ verify_ssl = true [dev-packages] pylint = "*" autopep8 = "*" -pandas = "*" -lxml = "*" [packages] requests = "*" scrapy = "*" aiohttp = "*" aiofiles = "*" -beautifulsoup4 = "*" +pillow = "*" [requires] python_version = "3.7" diff --git a/Pipfile.lock b/Pipfile.lock index 6376759..1cf5f5d 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "ef9b7c063a03c4ff885df382809dfd8ef6709c19642f44690b48a2158972fe1f" + "sha256": "f0cb820eb373d12f4e6fed5c7be9e12777caac8f3de117471de8c3e3a2699d6d" }, "pipfile-spec": 6, "requires": { @@ -80,15 +80,6 @@ ], "version": "==0.7.0" }, - "beautifulsoup4": { - "hashes": [ - "sha256:1ed70a0e99742653953d68462378a1a8eb65dca5f7c8fa44a05a2a0b3545df67", - "sha256:6a7f5e0efc563cd1ffeefba6d528b97aa0d313c02dd126ba6c455e5fe5bd48eb", - "sha256:e394827904cc4923f443e8dd2e9968343669c8e1ad7a8d62d7541e780884acb8" - ], - "index": "aliyun", - "version": "==4.7.0" - }, "certifi": { "hashes": [ "sha256:47f9c83ef4c0c621eaef743f133f09fa8a74a9b75f037e8624f83bd1b6626cb7", @@ -201,38 +192,34 @@ }, "lxml": { "hashes": [ - "sha256:02bc220d61f46e9b9d5a53c361ef95e9f5e1d27171cd461dddb17677ae2289a5", - "sha256:22f253b542a342755f6cfc047fe4d3a296515cf9b542bc6e261af45a80b8caf6", - "sha256:2f31145c7ff665b330919bfa44aacd3a0211a76ca7e7b441039d2a0b0451e415", - "sha256:36720698c29e7a9626a0dc802ef8885f8f0239bfd1689628ecd459a061f2807f", - "sha256:438a1b0203545521f6616132bfe0f4bca86f8a401364008b30e2b26ec408ce85", - "sha256:4815892904c336bbaf73dafd54f45f69f4021c22b5bad7332176bbf4fb830568", - "sha256:5be031b0f15ad63910d8e5038b489d95a79929513b3634ad4babf77100602588", - "sha256:5c93ae37c3c588e829b037fdfbd64a6e40c901d3f93f7beed6d724c44829a3ad", - "sha256:60842230678674cdac4a1cf0f707ef12d75b9a4fc4a565add4f710b5fcf185d5", - "sha256:62939a8bb6758d1bf923aa1c13f0bcfa9bf5b2fc0f5fa917a6e25db5fe0cfa4e", - "sha256:75830c06a62fe7b8fe3bbb5f269f0b308f19f3949ac81cfd40062f47c1455faf", - "sha256:81992565b74332c7c1aff6a913a3e906771aa81c9d0c68c68113cffcae45bc53", - "sha256:8c892fb0ee52c594d9a7751c7d7356056a9682674b92cc1c4dc968ff0f30c52f", - "sha256:9d862e3cf4fc1f2837dedce9c42269c8c76d027e49820a548ac89fdcee1e361f", - "sha256:a623965c086a6e91bb703d4da62dabe59fe88888e82c4117d544e11fd74835d6", - "sha256:a7783ab7f6a508b0510490cef9f857b763d796ba7476d9703f89722928d1e113", - "sha256:aab09fbe8abfa3b9ce62aaf45aca2d28726b1b9ee44871dbe644050a2fff4940", - "sha256:abf181934ac3ef193832fb973fd7f6149b5c531903c2ec0f1220941d73eee601", - "sha256:ae07fa0c115733fce1e9da96a3ac3fa24801742ca17e917e0c79d63a01eeb843", - "sha256:b9c78242219f674ab645ec571c9a95d70f381319a23911941cd2358a8e0521cf", - "sha256:bccb267678b870d9782c3b44d0cefe3ba0e329f9af8c946d32bf3778e7a4f271", - "sha256:c4df4d27f4c93b2cef74579f00b1d3a31a929c7d8023f870c4b476f03a274db4", - "sha256:caf0e50b546bb60dfa99bb18dfa6748458a83131ecdceaf5c071d74907e7e78a", - "sha256:d3266bd3ac59ac4edcd5fa75165dee80b94a3e5c91049df5f7c057ccf097551c", - "sha256:db0d213987bcd4e6d41710fb4532b22315b0d8fb439ff901782234456556aed1", - "sha256:dbbd5cf7690a40a9f0a9325ab480d0fccf46d16b378eefc08e195d84299bfae1", - "sha256:e16e07a0ec3a75b5ee61f2b1003c35696738f937dc8148fbda9fe2147ccb6e61", - "sha256:e175a006725c7faadbe69e791877d09936c0ef2cf49d01b60a6c1efcb0e8be6f", - "sha256:edd9c13a97f6550f9da2236126bb51c092b3b1ce6187f2bd966533ad794bbb5e", - "sha256:fa39ea60d527fbdd94215b5e5552f1c6a912624521093f1384a491a8ad89ad8b" - ], - "version": "==4.2.5" + "sha256:0dd6589fa75d369ba06d2b5f38dae107f76ea127f212f6a7bee134f6df2d1d21", + "sha256:1afbac344aa68c29e81ab56c1a9411c3663157b5aee5065b7fa030b398d4f7e0", + "sha256:1baad9d073692421ad5dbbd81430aba6c7f5fdc347f03537ae046ddf2c9b2297", + "sha256:1d8736421a2358becd3edf20260e41a06a0bf08a560480d3a5734a6bcbacf591", + "sha256:1e1d9bddc5afaddf0de76246d3f2152f961697ad7439c559f179002682c45801", + "sha256:1f179dc8b2643715f020f4d119d5529b02cd794c1c8f305868b73b8674d2a03f", + "sha256:241fb7bdf97cb1df1edfa8f0bcdfd80525d4023dac4523a241907c8b2f44e541", + "sha256:2f9765ee5acd3dbdcdc0d0c79309e01f7c16bc8d39b49250bf88de7b46daaf58", + "sha256:312e1e1b1c3ce0c67e0b8105317323e12807955e8186872affb667dbd67971f6", + "sha256:3273db1a8055ca70257fd3691c6d2c216544e1a70b673543e15cc077d8e9c730", + "sha256:34dfaa8c02891f9a246b17a732ca3e99c5e42802416628e740a5d1cb2f50ff49", + "sha256:3aa3f5288af349a0f3a96448ebf2e57e17332d99f4f30b02093b7948bd9f94cc", + "sha256:51102e160b9d83c1cc435162d90b8e3c8c93b28d18d87b60c56522d332d26879", + "sha256:56115fc2e2a4140e8994eb9585119a1ae9223b506826089a3ba753a62bd194a6", + "sha256:69d83de14dbe8fe51dccfd36f88bf0b40f5debeac763edf9f8325180190eba6e", + "sha256:99fdce94aeaa3ccbdfcb1e23b34273605c5853aa92ec23d84c84765178662c6c", + "sha256:a7c0cd5b8a20f3093ee4a67374ccb3b8a126743b15a4d759e2a1bf098faac2b2", + "sha256:abe12886554634ed95416a46701a917784cb2b4c77bfacac6916681d49bbf83d", + "sha256:b4f67b5183bd5f9bafaeb76ad119e977ba570d2b0e61202f534ac9b5c33b4485", + "sha256:bdd7c1658475cc1b867b36d5c4ed4bc316be8d3368abe03d348ba906a1f83b0e", + "sha256:c6f24149a19f611a415a51b9bc5f17b6c2f698e0d6b41ffb3fa9f24d35d05d73", + "sha256:d1e111b3ab98613115a208c1017f266478b0ab224a67bc8eac670fa0bad7d488", + "sha256:d6520aa965773bbab6cb7a791d5895b00d02cf9adc93ac2bf4edb9ac1a6addc5", + "sha256:dd185cde2ccad7b649593b0cda72021bc8a91667417001dbaf24cd746ecb7c11", + "sha256:de2e5b0828a9d285f909b5d2e9d43f1cf6cf21fe65bc7660bdaa1780c7b58298", + "sha256:f726444b8e909c4f41b4fde416e1071cf28fa84634bfb4befdf400933b6463af" + ], + "version": "==4.3.0" }, "multidict": { "hashes": [ @@ -275,6 +262,42 @@ ], "version": "==1.5.1" }, + "pillow": { + "hashes": [ + "sha256:051de330a06c99d6f84bcf582960487835bcae3fc99365185dc2d4f65a390c0e", + "sha256:0ae5289948c5e0a16574750021bd8be921c27d4e3527800dc9c2c1d2abc81bf7", + "sha256:0b1efce03619cdbf8bcc61cfae81fcda59249a469f31c6735ea59badd4a6f58a", + "sha256:163136e09bd1d6c6c6026b0a662976e86c58b932b964f255ff384ecc8c3cefa3", + "sha256:18e912a6ccddf28defa196bd2021fe33600cbe5da1aa2f2e2c6df15f720b73d1", + "sha256:24ec3dea52339a610d34401d2d53d0fb3c7fd08e34b20c95d2ad3973193591f1", + "sha256:267f8e4c0a1d7e36e97c6a604f5b03ef58e2b81c1becb4fccecddcb37e063cc7", + "sha256:3273a28734175feebbe4d0a4cde04d4ed20f620b9b506d26f44379d3c72304e1", + "sha256:4c678e23006798fc8b6f4cef2eaad267d53ff4c1779bd1af8725cc11b72a63f3", + "sha256:4d4bc2e6bb6861103ea4655d6b6f67af8e5336e7216e20fff3e18ffa95d7a055", + "sha256:505738076350a337c1740a31646e1de09a164c62c07db3b996abdc0f9d2e50cf", + "sha256:5233664eadfa342c639b9b9977190d64ad7aca4edc51a966394d7e08e7f38a9f", + "sha256:5d95cb9f6cced2628f3e4de7e795e98b2659dfcc7176ab4a01a8b48c2c2f488f", + "sha256:7eda4c737637af74bac4b23aa82ea6fbb19002552be85f0b89bc27e3a762d239", + "sha256:801ddaa69659b36abf4694fed5aa9f61d1ecf2daaa6c92541bbbbb775d97b9fe", + "sha256:825aa6d222ce2c2b90d34a0ea31914e141a85edefc07e17342f1d2fdf121c07c", + "sha256:9c215442ff8249d41ff58700e91ef61d74f47dfd431a50253e1a1ca9436b0697", + "sha256:a3d90022f2202bbb14da991f26ca7a30b7e4c62bf0f8bf9825603b22d7e87494", + "sha256:a631fd36a9823638fe700d9225f9698fb59d049c942d322d4c09544dc2115356", + "sha256:a6523a23a205be0fe664b6b8747a5c86d55da960d9586db039eec9f5c269c0e6", + "sha256:a756ecf9f4b9b3ed49a680a649af45a8767ad038de39e6c030919c2f443eb000", + "sha256:b117287a5bdc81f1bac891187275ec7e829e961b8032c9e5ff38b70fd036c78f", + "sha256:ba04f57d1715ca5ff74bb7f8a818bf929a204b3b3c2c2826d1e1cc3b1c13398c", + "sha256:cd878195166723f30865e05d87cbaf9421614501a4bd48792c5ed28f90fd36ca", + "sha256:cee815cc62d136e96cf76771b9d3eb58e0777ec18ea50de5cfcede8a7c429aa8", + "sha256:d1722b7aa4b40cf93ac3c80d3edd48bf93b9208241d166a14ad8e7a20ee1d4f3", + "sha256:d7c1c06246b05529f9984435fc4fa5a545ea26606e7f450bdbe00c153f5aeaad", + "sha256:e9c8066249c040efdda84793a2a669076f92a301ceabe69202446abb4c5c5ef9", + "sha256:f227d7e574d050ff3996049e086e1f18c7bd2d067ef24131e50a1d3fe5831fbc", + "sha256:fc9a12aad714af36cf3ad0275a96a733526571e52710319855628f476dcb144e" + ], + "index": "aliyun", + "version": "==5.4.1" + }, "pyasn1": { "hashes": [ "sha256:da2420fe13a9452d8ae97a0e478adde1dee153b11ba832a95b223a2ba01c10f7", @@ -353,13 +376,6 @@ ], "version": "==1.12.0" }, - "soupsieve": { - "hashes": [ - "sha256:057e08f362a255b457a5781675211556799ed3bb8807506eaac3809390bc304b", - "sha256:f7d99b41637be2f249dfcc06ae93c13fcbbdfa7bb68b15308cdd0734e58146f1" - ], - "version": "==1.6.1" - }, "twisted": { "hashes": [ "sha256:294be2c6bf84ae776df2fc98e7af7d6537e1c5e60a46d33c3ce2a197677da395" @@ -488,41 +504,6 @@ ], "version": "==1.3.1" }, - "lxml": { - "hashes": [ - "sha256:02bc220d61f46e9b9d5a53c361ef95e9f5e1d27171cd461dddb17677ae2289a5", - "sha256:22f253b542a342755f6cfc047fe4d3a296515cf9b542bc6e261af45a80b8caf6", - "sha256:2f31145c7ff665b330919bfa44aacd3a0211a76ca7e7b441039d2a0b0451e415", - "sha256:36720698c29e7a9626a0dc802ef8885f8f0239bfd1689628ecd459a061f2807f", - "sha256:438a1b0203545521f6616132bfe0f4bca86f8a401364008b30e2b26ec408ce85", - "sha256:4815892904c336bbaf73dafd54f45f69f4021c22b5bad7332176bbf4fb830568", - "sha256:5be031b0f15ad63910d8e5038b489d95a79929513b3634ad4babf77100602588", - "sha256:5c93ae37c3c588e829b037fdfbd64a6e40c901d3f93f7beed6d724c44829a3ad", - "sha256:60842230678674cdac4a1cf0f707ef12d75b9a4fc4a565add4f710b5fcf185d5", - "sha256:62939a8bb6758d1bf923aa1c13f0bcfa9bf5b2fc0f5fa917a6e25db5fe0cfa4e", - "sha256:75830c06a62fe7b8fe3bbb5f269f0b308f19f3949ac81cfd40062f47c1455faf", - "sha256:81992565b74332c7c1aff6a913a3e906771aa81c9d0c68c68113cffcae45bc53", - "sha256:8c892fb0ee52c594d9a7751c7d7356056a9682674b92cc1c4dc968ff0f30c52f", - "sha256:9d862e3cf4fc1f2837dedce9c42269c8c76d027e49820a548ac89fdcee1e361f", - "sha256:a623965c086a6e91bb703d4da62dabe59fe88888e82c4117d544e11fd74835d6", - "sha256:a7783ab7f6a508b0510490cef9f857b763d796ba7476d9703f89722928d1e113", - "sha256:aab09fbe8abfa3b9ce62aaf45aca2d28726b1b9ee44871dbe644050a2fff4940", - "sha256:abf181934ac3ef193832fb973fd7f6149b5c531903c2ec0f1220941d73eee601", - "sha256:ae07fa0c115733fce1e9da96a3ac3fa24801742ca17e917e0c79d63a01eeb843", - "sha256:b9c78242219f674ab645ec571c9a95d70f381319a23911941cd2358a8e0521cf", - "sha256:bccb267678b870d9782c3b44d0cefe3ba0e329f9af8c946d32bf3778e7a4f271", - "sha256:c4df4d27f4c93b2cef74579f00b1d3a31a929c7d8023f870c4b476f03a274db4", - "sha256:caf0e50b546bb60dfa99bb18dfa6748458a83131ecdceaf5c071d74907e7e78a", - "sha256:d3266bd3ac59ac4edcd5fa75165dee80b94a3e5c91049df5f7c057ccf097551c", - "sha256:db0d213987bcd4e6d41710fb4532b22315b0d8fb439ff901782234456556aed1", - "sha256:dbbd5cf7690a40a9f0a9325ab480d0fccf46d16b378eefc08e195d84299bfae1", - "sha256:e16e07a0ec3a75b5ee61f2b1003c35696738f937dc8148fbda9fe2147ccb6e61", - "sha256:e175a006725c7faadbe69e791877d09936c0ef2cf49d01b60a6c1efcb0e8be6f", - "sha256:edd9c13a97f6550f9da2236126bb51c092b3b1ce6187f2bd966533ad794bbb5e", - "sha256:fa39ea60d527fbdd94215b5e5552f1c6a912624521093f1384a491a8ad89ad8b" - ], - "version": "==4.2.5" - }, "mccabe": { "hashes": [ "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42", @@ -530,65 +511,6 @@ ], "version": "==0.6.1" }, - "numpy": { - "hashes": [ - "sha256:0df89ca13c25eaa1621a3f09af4c8ba20da849692dcae184cb55e80952c453fb", - "sha256:154c35f195fd3e1fad2569930ca51907057ae35e03938f89a8aedae91dd1b7c7", - "sha256:18e84323cdb8de3325e741a7a8dd4a82db74fde363dce32b625324c7b32aa6d7", - "sha256:1e8956c37fc138d65ded2d96ab3949bd49038cc6e8a4494b1515b0ba88c91565", - "sha256:23557bdbca3ccbde3abaa12a6e82299bc92d2b9139011f8c16ca1bb8c75d1e95", - "sha256:24fd645a5e5d224aa6e39d93e4a722fafa9160154f296fd5ef9580191c755053", - "sha256:36e36b6868e4440760d4b9b44587ea1dc1f06532858d10abba98e851e154ca70", - "sha256:3d734559db35aa3697dadcea492a423118c5c55d176da2f3be9c98d4803fc2a7", - "sha256:416a2070acf3a2b5d586f9a6507bb97e33574df5bd7508ea970bbf4fc563fa52", - "sha256:4a22dc3f5221a644dfe4a63bf990052cc674ef12a157b1056969079985c92816", - "sha256:4d8d3e5aa6087490912c14a3c10fbdd380b40b421c13920ff468163bc50e016f", - "sha256:4f41fd159fba1245e1958a99d349df49c616b133636e0cf668f169bce2aeac2d", - "sha256:561ef098c50f91fbac2cc9305b68c915e9eb915a74d9038ecf8af274d748f76f", - "sha256:56994e14b386b5c0a9b875a76d22d707b315fa037affc7819cda08b6d0489756", - "sha256:73a1f2a529604c50c262179fcca59c87a05ff4614fe8a15c186934d84d09d9a5", - "sha256:7da99445fd890206bfcc7419f79871ba8e73d9d9e6b82fe09980bc5bb4efc35f", - "sha256:99d59e0bcadac4aa3280616591fb7bcd560e2218f5e31d5223a2e12a1425d495", - "sha256:a4cc09489843c70b22e8373ca3dfa52b3fab778b57cf81462f1203b0852e95e3", - "sha256:a61dc29cfca9831a03442a21d4b5fd77e3067beca4b5f81f1a89a04a71cf93fa", - "sha256:b1853df739b32fa913cc59ad9137caa9cc3d97ff871e2bbd89c2a2a1d4a69451", - "sha256:b1f44c335532c0581b77491b7715a871d0dd72e97487ac0f57337ccf3ab3469b", - "sha256:b261e0cb0d6faa8fd6863af26d30351fd2ffdb15b82e51e81e96b9e9e2e7ba16", - "sha256:c857ae5dba375ea26a6228f98c195fec0898a0fd91bcf0e8a0cae6d9faf3eca7", - "sha256:cf5bb4a7d53a71bb6a0144d31df784a973b36d8687d615ef6a7e9b1809917a9b", - "sha256:db9814ff0457b46f2e1d494c1efa4111ca089e08c8b983635ebffb9c1573361f", - "sha256:df04f4bad8a359daa2ff74f8108ea051670cafbca533bb2636c58b16e962989e", - "sha256:ecf81720934a0e18526177e645cbd6a8a21bb0ddc887ff9738de07a1df5c6b61", - "sha256:edfa6fba9157e0e3be0f40168eb142511012683ac3dc82420bee4a3f3981b30e" - ], - "version": "==1.15.4" - }, - "pandas": { - "hashes": [ - "sha256:11975fad9edbdb55f1a560d96f91830e83e29bed6ad5ebf506abda09818eaf60", - "sha256:12e13d127ca1b585dd6f6840d3fe3fa6e46c36a6afe2dbc5cb0b57032c902e31", - "sha256:1c87fcb201e1e06f66e23a61a5fea9eeebfe7204a66d99df24600e3f05168051", - "sha256:242e9900de758e137304ad4b5663c2eff0d798c2c3b891250bd0bd97144579da", - "sha256:26c903d0ae1542890cb9abadb4adcb18f356b14c2df46e4ff657ae640e3ac9e7", - "sha256:2e1e88f9d3e5f107b65b59cd29f141995597b035d17cc5537e58142038942e1a", - "sha256:31b7a48b344c14691a8e92765d4023f88902ba3e96e2e4d0364d3453cdfd50db", - "sha256:4fd07a932b4352f8a8973761ab4e84f965bf81cc750fb38e04f01088ab901cb8", - "sha256:5b24ca47acf69222e82530e89111dd9d14f9b970ab2cd3a1c2c78f0c4fbba4f4", - "sha256:647b3b916cc8f6aeba240c8171be3ab799c3c1b2ea179a3be0bd2712c4237553", - "sha256:66b060946046ca27c0e03e9bec9bba3e0b918bafff84c425ca2cc2e157ce121e", - "sha256:6efa9fa6e1434141df8872d0fa4226fc301b17aacf37429193f9d70b426ea28f", - "sha256:be4715c9d8367e51dbe6bc6d05e205b1ae234f0dc5465931014aa1c4af44c1ba", - "sha256:bea90da782d8e945fccfc958585210d23de374fa9294a9481ed2abcef637ebfc", - "sha256:d318d77ab96f66a59e792a481e2701fba879e1a453aefeebdb17444fe204d1ed", - "sha256:d785fc08d6f4207437e900ffead930a61e634c5e4f980ba6d3dc03c9581748c7", - "sha256:de9559287c4fe8da56e8c3878d2374abc19d1ba2b807bfa7553e912a8e5ba87c", - "sha256:f4f98b190bb918ac0bc0e3dd2ab74ff3573da9f43106f6dba6385406912ec00f", - "sha256:f71f1a7e2d03758f6e957896ed696254e2bc83110ddbc6942018f1a232dd9dad", - "sha256:fb944c8f0b0ab5c1f7846c686bc4cdf8cde7224655c12edcd59d5212cd57bec0" - ], - "index": "aliyun", - "version": "==0.23.4" - }, "pycodestyle": { "hashes": [ "sha256:cbc619d09254895b0d12c2c691e237b2e91e9b2ecf5e84c26b35400f93dcfb83", @@ -604,20 +526,6 @@ "index": "aliyun", "version": "==2.2.2" }, - "python-dateutil": { - "hashes": [ - "sha256:063df5763652e21de43de7d9e00ccf239f953a832941e37be541614732cdfc93", - "sha256:88f9287c0174266bb0d8cedd395cfba9c58e87e5ad86b2ce58859bc11be3cf02" - ], - "version": "==2.7.5" - }, - "pytz": { - "hashes": [ - "sha256:31cb35c89bd7d333cd32c5f278fca91b523b0834369e757f4c5641ea252236ca", - "sha256:8e0f8568c118d3077b46be7d654cc8167fa916092e28320cde048e54bfc9f1e6" - ], - "version": "==2018.7" - }, "six": { "hashes": [ "sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c", diff --git a/json_requests.py b/spider_requests/json_requests.py similarity index 96% rename from json_requests.py rename to spider_requests/json_requests.py index 19f25f6..4cd1dc1 100644 --- a/json_requests.py +++ b/spider_requests/json_requests.py @@ -39,9 +39,7 @@ def test(self, response): data = result.get('data') if data: object_list = data.get('object_list') - if not object_list: - return False - else: + if object_list: return True def write_into_file(self, response): diff --git a/spider_requests.py b/spider_requests/spider_requests.py similarity index 96% rename from spider_requests.py rename to spider_requests/spider_requests.py index 08bb5dd..fba4525 100644 --- a/spider_requests.py +++ b/spider_requests/spider_requests.py @@ -40,9 +40,7 @@ def test(self, response): data = result.get('data') if data: object_list = data.get('object_list') - if not object_list: - return None - else: + if object_list: for i in object_list: items = {} photo = i.get('photo') @@ -114,9 +112,9 @@ def write_into_file(self, format, response): def main(): - # print('Enter the keyowrd: ', end='') - # kw = input() - kw = 'correct' + print('Enter the keyowrd: ', end='') + kw = input() + # kw = 'correct' start_time = time.time() counter = 0 for start in range(0, 3600, 24): diff --git a/spider_scrapy/spider_scrapy/items.py b/spider_scrapy/spider_scrapy/items.py index 9d44e09..b833bcd 100644 --- a/spider_scrapy/spider_scrapy/items.py +++ b/spider_scrapy/spider_scrapy/items.py @@ -10,5 +10,4 @@ class SpiderScrapyItem(scrapy.Item): # define the fields for your item here like: - # name = scrapy.Field() - result = scrapy.Field() + path = scrapy.Field() diff --git a/spider_scrapy/spider_scrapy/pipelines.py b/spider_scrapy/spider_scrapy/pipelines.py index 68ef558..7c73094 100644 --- a/spider_scrapy/spider_scrapy/pipelines.py +++ b/spider_scrapy/spider_scrapy/pipelines.py @@ -5,11 +5,29 @@ # Don't forget to add your pipeline to the ITEM_PIPELINES setting # See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html +from hashlib import md5 -class SpiderScrapyPipeline(object): +import scrapy +from scrapy.pipelines.images import ImagesPipeline + + +class SpiderScrapyPipeline: def __init__(self): - self.file = open('data.json', 'w', encoding='utf-8') + pass + + +class ImagePipeline(ImagesPipeline): + def file_path(self, request, item, response=None, info=None): + if 'gif' in item['path']: + filename = '{1}.{2}'.format( + md5(response.content).hexdigest(), 'gif') + elif 'png' in item['path']: + filename = '{1}.{2}'.format( + md5(response.content).hexdigest(), 'png') + elif 'jpg' or 'jpeg' in item['path']: + filename = '{1}.{2}'.format( + md5(response.content).hexdigest(), 'jpg') + return filename - def process_item(self, item, spider): - self.file.write(item.get('result')) - return item + def get_media_requests(self, item, info): + yield scrapy.Request(item['path']) diff --git a/spider_scrapy/spider_scrapy/settings.py b/spider_scrapy/spider_scrapy/settings.py index 97ddab1..df1ba60 100644 --- a/spider_scrapy/spider_scrapy/settings.py +++ b/spider_scrapy/spider_scrapy/settings.py @@ -65,8 +65,9 @@ # Configure item pipelines # See https://doc.scrapy.org/en/latest/topics/item-pipeline.html ITEM_PIPELINES = { - 'spider_scrapy.pipelines.SpiderScrapyPipeline': 300, + 'spider_scrapy.pipelines.ImagePipeline': 300, } +IMAGES_STORE = './dist' # Enable and configure the AutoThrottle extension (disabled by default) # See https://doc.scrapy.org/en/latest/topics/autothrottle.html diff --git a/spider_scrapy/spider_scrapy/spiders/duitang.py b/spider_scrapy/spider_scrapy/spiders/duitang.py index af2f1fc..4246cc0 100644 --- a/spider_scrapy/spider_scrapy/spiders/duitang.py +++ b/spider_scrapy/spider_scrapy/spiders/duitang.py @@ -6,9 +6,6 @@ import time from spider_scrapy.items import SpiderScrapyItem -BASE_DIR = os.path.dirname(os.path.abspath(__file__)) -DIST_DIR = os.path.join(BASE_DIR, 'dist') - class DuitangSpider(scrapy.Spider): name = 'duitang' @@ -17,7 +14,7 @@ class DuitangSpider(scrapy.Spider): # start_urls = [] def start_requests(self): - for start in range(0, 1200, 24): + for start in range(0, 360, 24): url = 'https://www.duitang.com/napi/blog/list/by_search/?kw={0}&type=feed&include_fields=top_comments%2Cis_root%2Csource_link%2Citem%2Cbuyable%2Croot_id%2Cstatus%2Clike_count%2Clike_id%2Csender%2Calbum%2Creply_count%2Cfavorite_blog_id&_type=&start={1}'.format( self.kw, start) @@ -31,18 +28,14 @@ def parse(self, response): if data: object_list = data.get('object_list') if object_list: - result = json.dumps(json.loads(response.text), - indent=4, ensure_ascii=False) - result_dir = os.path.join( - os.path.join(DIST_DIR, 'json'), self.kw) - page = response.url.split("=")[-1] - if not os.path.exists(result_dir): - os.makedirs(result_dir) - result_path = os.path.join( - result_dir, '{0}.json'.format(int(page) // 24 + 1)) - item['result'] = result - return item - - else: - pass - + for i in object_list: + item = SpiderScrapyItem() + photo = i.get('photo') + if photo: + path = photo.get('path') + if path: + if 'gif_jpeg' in path: + item['path'] = path[:-5] + else: + item['path'] = path + yield item