web image collecting patch not working anymore


    Jan 04 2014 | 11:50 pm
    Hi,
    My old web image collecting patch isn't working anymore because the new code and way how google is handling/showing images on their website.
    My old patch is big and a mess, because of that i am trying to get it working again, and testing with a clear patch i have find here in an old topic: "Web scraping google images" http://cycling74.com/forums/topic/web-scraping-google-images/
    Done a lot of tests with different expressions in [jit.str.regexp].
    But with no luck at all. Long time ago that i have used expressions.
    I know, I have to maintain the languages. :)
    Can somebody look into it, meanly for the expression in [jit.str.regexp]?
    I am testing with the Bing image search engine now.
    ----------begin_max5_patcher----------
    {
    "boxes" : [ {
    "box" : {
    "maxclass" : "comment",
    "text" : "<< test expr",
    "patching_rect" : [ 518.0, 316.0, 188.0, 20.0 ],
    "id" : "obj-3",
    "fontname" : "Arial",
    "frgb" : 0.0,
    "numinlets" : 1,
    "numoutlets" : 0,
    "fontsize" : 12.0,
    "presentation_rect" : [ 518.0, 318.0, 0.0, 0.0 ]
    }
    }
    , {
    "box" : {
    "maxclass" : "newobj",
    "text" : "jit.str.fromsymbol",
    "patching_rect" : [ 498.0, 267.0, 109.0, 21.0 ],
    "outlettype" : [ "jit_matrix", "" ],
    "id" : "obj-16",
    "fontname" : "Arial",
    "numinlets" : 1,
    "numoutlets" : 2,
    "fontsize" : 13.0
    }
    }
    , {
    "box" : {
    "maxclass" : "comment",
    "text" : "http://www.bing.com/images/search?&q=object&qft=+filterui:imagesize-medium",
    "patching_rect" : [ 212.0, 59.0, 476.0, 20.0 ],
    "id" : "obj-7",
    "fontname" : "Arial",
    "frgb" : 0.0,
    "numinlets" : 1,
    "numoutlets" : 0,
    "fontsize" : 12.0
    }
    }
    , {
    "box" : {
    "maxclass" : "newobj",
    "text" : "jit.str.regexp @re "patching_rect" : [ 703.0, 321.0, 285.0, 20.0 ],
    "outlettype" : [ "jit_matrix", "jit_matrix", "jit_matrix", "jit_matrix", "" ],
    "id" : "obj-8",
    "fontname" : "Arial",
    "numinlets" : 1,
    "numoutlets" : 5,
    "fontsize" : 12.0
    }
    }
    , {
    "box" : {
    "maxclass" : "message",
    "text" : "download http://www.bing.com/images/search?&q=object&qft=+filterui:imagesize-medium matrix",
    "patching_rect" : [ 295.0, 164.0, 578.0, 18.0 ],
    "outlettype" : [ "" ],
    "id" : "obj-2",
    "fontname" : "Arial",
    "numinlets" : 2,
    "numoutlets" : 1,
    "fontsize" : 12.0
    }
    }
    , {
    "box" : {
    "maxclass" : "newobj",
    "text" : "jit.str.regexp @re imgurl=([^&]+)&",
    "patching_rect" : [ 718.0, 296.0, 188.0, 20.0 ],
    "outlettype" : [ "jit_matrix", "jit_matrix", "jit_matrix", "jit_matrix", "" ],
    "id" : "obj-1",
    "fontname" : "Arial",
    "numinlets" : 1,
    "numoutlets" : 5,
    "fontsize" : 12.0
    }
    }
    , {
    "box" : {
    "maxclass" : "comment",
    "text" : "<< after populating collection i start the random (urn)",
    "patching_rect" : [ 193.0, 267.0, 289.0, 20.0 ],
    "id" : "obj-130",
    "fontname" : "Arial",
    "frgb" : 0.0,
    "numinlets" : 1,
    "numoutlets" : 0,
    "fontsize" : 12.0
    }
    }
    , {
    "box" : {
    "maxclass" : "newobj",
    "text" : "t b l",
    "patching_rect" : [ 157.0, 267.0, 32.5, 20.0 ],
    "outlettype" : [ "bang", "" ],
    "id" : "obj-128",
    "fontname" : "Arial",
    "numinlets" : 1,
    "numoutlets" : 2,
    "fontsize" : 12.0
    }
    }
    , {
    "box" : {
    "maxclass" : "message",
    "text" : "object",
    "patching_rect" : [ 156.0, 59.0, 43.0, 18.0 ],
    "outlettype" : [ "" ],
    "id" : "obj-114",
    "fontname" : "Arial",
    "numinlets" : 2,
    "numoutlets" : 1,
    "fontsize" : 12.0
    }
    }
    , {
    "box" : {
    "maxclass" : "button",
    "patching_rect" : [ 337.0, 421.0, 50.0, 50.0 ],
    "outlettype" : [ "bang" ],
    "id" : "obj-91",
    "numinlets" : 1,
    "numoutlets" : 1
    }
    }
    , {
    "box" : {
    "maxclass" : "comment",
    "text" : "<< urn is nicer",
    "patching_rect" : [ 415.0, 477.0, 87.0, 20.0 ],
    "id" : "obj-89",
    "fontname" : "Arial",
    "hidden" : 1,
    "frgb" : 0.0,
    "numinlets" : 1,
    "numoutlets" : 0,
    "fontsize" : 12.0
    }
    }
    , {
    "box" : {
    "maxclass" : "newobj",
    "text" : "urn",
    "patching_rect" : [ 337.0, 478.0, 62.5, 20.0 ],
    "outlettype" : [ "int", "bang" ],
    "id" : "obj-88",
    "fontname" : "Arial",
    "numinlets" : 2,
    "numoutlets" : 2,
    "fontsize" : 12.0
    }
    }
    , {
    "box" : {
    "maxclass" : "comment",
    "text" : "<< here we get the number of strings\\nfeed it to random object",
    "linecount" : 2,
    "patching_rect" : [ 458.0, 350.0, 210.0, 33.0 ],
    "id" : "obj-87",
    "fontname" : "Arial",
    "frgb" : 0.0,
    "numinlets" : 1,
    "numoutlets" : 0,
    "fontsize" : 12.0
    }
    }
    , {
    "box" : {
    "maxclass" : "newobj",
    "text" : "route match",
    "patching_rect" : [ 381.0, 350.0, 74.0, 20.0 ],
    "outlettype" : [ "", "" ],
    "id" : "obj-82",
    "fontname" : "Arial",
    "numinlets" : 2,
    "numoutlets" : 2,
    "fontsize" : 12.0
    }
    }
    , {
    "box" : {
    "maxclass" : "comment",
    "text" : "<< this is the real meat, thnx OP !",
    "patching_rect" : [ 916.0, 296.0, 188.0, 20.0 ],
    "id" : "obj-78",
    "fontname" : "Arial",
    "frgb" : 0.0,
    "numinlets" : 1,
    "numoutlets" : 0,
    "fontsize" : 12.0
    }
    }
    , {
    "box" : {
    "maxclass" : "newobj",
    "text" : "print error",
    "patching_rect" : [ 212.0, 242.0, 63.0, 20.0 ],
    "id" : "obj-76",
    "fontname" : "Arial",
    "numinlets" : 1,
    "numoutlets" : 0,
    "fontsize" : 12.0
    }
    }
    , {
    "box" : {
    "maxclass" : "newobj",
    "text" : "sel 1",
    "patching_rect" : [ 195.0, 211.0, 36.0, 20.0 ],
    "outlettype" : [ "bang", "" ],
    "id" : "obj-75",
    "fontname" : "Arial",
    "numinlets" : 2,
    "numoutlets" : 2,
    "fontsize" : 12.0
    }
    }
    , {
    "box" : {
    "maxclass" : "comment",
    "text" : "<< cheak d/l went ok",
    "patching_rect" : [ 231.0, 188.0, 121.0, 20.0 ],
    "id" : "obj-74",
    "fontname" : "Arial",
    "frgb" : 0.0,
    "numinlets" : 1,
    "numoutlets" : 0,
    "fontsize" : 12.0
    }
    }
    , {
    "box" : {
    "maxclass" : "message",
    "text" : "$1",
    "patching_rect" : [ 195.0, 189.0, 32.5, 18.0 ],
    "outlettype" : [ "" ],
    "id" : "obj-71",
    "fontname" : "Arial",
    "numinlets" : 2,
    "numoutlets" : 1,
    "fontsize" : 12.0
    }
    }
    , {
    "box" : {
    "maxclass" : "newobj",
    "text" : "route download",
    "patching_rect" : [ 195.0, 164.0, 93.0, 20.0 ],
    "outlettype" : [ "", "" ],
    "id" : "obj-69",
    "fontname" : "Arial",
    "numinlets" : 2,
    "numoutlets" : 2,
    "fontsize" : 12.0
    }
    }
    , {
    "box" : {
    "maxclass" : "comment",
    "text" : "learn the value\\n of value :)\\nhere we use it to incament index",
    "linecount" : 4,
    "patching_rect" : [ 99.0, 457.0, 102.0, 60.0 ],
    "id" : "obj-68",
    "fontname" : "Arial",
    "frgb" : 0.0,
    "numinlets" : 1,
    "numoutlets" : 0,
    "fontsize" : 12.0
    }
    }
    , {
    "box" : {
    "maxclass" : "newobj",
    "text" : "t b i i",
    "patching_rect" : [ 211.0, 475.0, 46.0, 20.0 ],
    "outlettype" : [ "bang", "int", "int" ],
    "id" : "obj-65",
    "fontname" : "Arial",
    "numinlets" : 1,
    "numoutlets" : 3,
    "fontsize" : 12.0
    }
    }
    , {
    "box" : {
    "maxclass" : "newobj",
    "text" : "v index",
    "patching_rect" : [ 211.0, 451.0, 49.0, 20.0 ],
    "outlettype" : [ "" ],
    "id" : "obj-59",
    "fontname" : "Arial",
    "numinlets" : 1,
    "numoutlets" : 1,
    "fontsize" : 12.0
    }
    }
    , {
    "box" : {
    "maxclass" : "newobj",
    "text" : "+ 1",
    "patching_rect" : [ 211.0, 500.0, 32.5, 20.0 ],
    "outlettype" : [ "int" ],
    "id" : "obj-61",
    "fontname" : "Arial",
    "numinlets" : 2,
    "numoutlets" : 1,
    "fontsize" : 12.0
    }
    }
    , {
    "box" : {
    "maxclass" : "newobj",
    "text" : "pack 0 s",
    "patching_rect" : [ 238.0, 536.0, 71.0, 20.0 ],
    "outlettype" : [ "" ],
    "id" : "obj-39",
    "fontname" : "Arial",
    "numinlets" : 2,
    "numoutlets" : 1,
    "fontsize" : 12.0
    }
    }
    , {
    "box" : {
    "maxclass" : "comment",
    "text" : "",
    "linecount" : 2,
    "patching_rect" : [ 91.0, 404.0, 114.0, 33.0 ],
    "id" : "obj-4",
    "fontname" : "Arial",
    "frgb" : 0.0,
    "numinlets" : 1,
    "numoutlets" : 0,
    "fontsize" : 12.0
    }
    }
    , {
    "box" : {
    "maxclass" : "newobj",
    "text" : "t s b",
    "patching_rect" : [ 156.0, 89.0, 427.0, 20.0 ],
    "outlettype" : [ "", "bang" ],
    "id" : "obj-20",
    "fontname" : "Arial",
    "numinlets" : 1,
    "numoutlets" : 2,
    "fontsize" : 12.0
    }
    }
    , {
    "box" : {
    "maxclass" : "newobj",
    "text" : "jit.str.tosymbol",
    "patching_rect" : [ 281.0, 350.0, 87.0, 20.0 ],
    "outlettype" : [ "", "" ],
    "id" : "obj-28",
    "fontname" : "Arial",
    "numinlets" : 1,
    "numoutlets" : 2,
    "fontsize" : 12.0
    }
    }
    , {
    "box" : {
    "maxclass" : "newobj",
    "text" : "jit.str.regexp @re imgurl\\\\\\\\:\\\\\\\\"\\\\\\\\\\\\;[\\\\\\\\w|/|:|.]*?\\\\\\\\.(?:jpg|gif|png)",
    "patching_rect" : [ 170.5, 316.0, 331.0, 20.0 ],
    "outlettype" : [ "jit_matrix", "jit_matrix", "jit_matrix", "jit_matrix", "" ],
    "id" : "obj-27",
    "fontname" : "Arial",
    "numinlets" : 1,
    "numoutlets" : 5,
    "fontsize" : 12.0
    }
    }
    , {
    "box" : {
    "maxclass" : "newobj",
    "text" : "jit.textfile",
    "patching_rect" : [ 156.0, 242.0, 57.0, 20.0 ],
    "outlettype" : [ "jit_matrix", "jit_matrix", "" ],
    "id" : "obj-26",
    "fontname" : "Arial",
    "numinlets" : 1,
    "numoutlets" : 3,
    "fontsize" : 12.0
    }
    }
    , {
    "box" : {
    "maxclass" : "newobj",
    "text" : "jit.uldl",
    "patching_rect" : [ 156.0, 142.0, 59.0, 20.0 ],
    "outlettype" : [ "jit_matrix", "" ],
    "id" : "obj-25",
    "fontname" : "Arial",
    "numinlets" : 1,
    "numoutlets" : 2,
    "fontsize" : 12.0
    }
    }
    , {
    "box" : {
    "maxclass" : "newobj",
    "text" : "sprintf download http://www.bing.com/images/search?&q=%s&qft=+filterui:imagesize-medium matrix",
    "patching_rect" : [ 156.0, 120.0, 542.0, 20.0 ],
    "outlettype" : [ "" ],
    "id" : "obj-24",
    "fontname" : "Arial",
    "numinlets" : 1,
    "numoutlets" : 1,
    "fontsize" : 12.0
    }
    }
    ],
    "lines" : [ {
    "patchline" : {
    "source" : [ "obj-91", 0 ],
    "destination" : [ "obj-88", 0 ],
    "hidden" : 0,
    "disabled" : 0
    }
    }
    , {
    "patchline" : {
    "source" : [ "obj-88", 0 ],
    "destination" : [ "obj-5", 0 ],
    "hidden" : 0,
    "disabled" : 0
    }
    }
    , {
    "patchline" : {
    "source" : [ "obj-82", 0 ],
    "destination" : [ "obj-88", 1 ],
    "hidden" : 0,
    "midpoints" : [ 390.5, 387.5, 390.0, 387.5 ],
    "disabled" : 0
    }
    }
    , {
    "patchline" : {
    "source" : [ "obj-75", 1 ],
    "destination" : [ "obj-76", 0 ],
    "hidden" : 0,
    "disabled" : 0
    }
    }
    , {
    "patchline" : {
    "source" : [ "obj-75", 0 ],
    "destination" : [ "obj-26", 0 ],
    "hidden" : 0,
    "disabled" : 0
    }
    }
    , {
    "patchline" : {
    "source" : [ "obj-71", 0 ],
    "destination" : [ "obj-75", 0 ],
    "hidden" : 0,
    "disabled" : 0
    }
    }
    , {
    "patchline" : {
    "source" : [ "obj-69", 0 ],
    "destination" : [ "obj-71", 0 ],
    "hidden" : 0,
    "disabled" : 0
    }
    }
    , {
    "patchline" : {
    "source" : [ "obj-65", 0 ],
    "destination" : [ "obj-61", 0 ],
    "hidden" : 0,
    "disabled" : 0
    }
    }
    , {
    "patchline" : {
    "source" : [ "obj-65", 1 ],
    "destination" : [ "obj-61", 0 ],
    "hidden" : 0,
    "disabled" : 0
    }
    }
    , {
    "patchline" : {
    "source" : [ "obj-65", 2 ],
    "destination" : [ "obj-39", 0 ],
    "hidden" : 0,
    "midpoints" : [ 247.5, 531.0, 247.5, 531.0 ],
    "disabled" : 0
    }
    }
    , {
    "patchline" : {
    "source" : [ "obj-61", 0 ],
    "destination" : [ "obj-59", 0 ],
    "hidden" : 0,
    "midpoints" : [ 220.5, 526.0, 205.0, 526.0, 205.0, 446.0, 220.5, 446.0 ],
    "disabled" : 0
    }
    }
    , {
    "patchline" : {
    "source" : [ "obj-59", 0 ],
    "destination" : [ "obj-65", 0 ],
    "hidden" : 0,
    "disabled" : 0
    }
    }
    , {
    "patchline" : {
    "source" : [ "obj-39", 0 ],
    "destination" : [ "obj-5", 0 ],
    "hidden" : 0,
    "midpoints" : [ 247.5, 536.5, 342.5, 536.5 ],
    "disabled" : 0
    }
    }
    , {
    "patchline" : {
    "source" : [ "obj-30", 0 ],
    "destination" : [ "obj-32", 0 ],
    "hidden" : 0,
    "disabled" : 0
    }
    }
    , {
    "patchline" : {
    "source" : [ "obj-30", 1 ],
    "destination" : [ "obj-31", 0 ],
    "hidden" : 0,
    "disabled" : 0
    }
    }
    , {
    "patchline" : {
    "source" : [ "obj-28", 0 ],
    "destination" : [ "obj-10", 0 ],
    "hidden" : 0,
    "disabled" : 0
    }
    }
    , {
    "patchline" : {
    "source" : [ "obj-27", 4 ],
    "destination" : [ "obj-82", 0 ],
    "hidden" : 0,
    "disabled" : 0
    }
    }
    , {
    "patchline" : {
    "source" : [ "obj-27", 2 ],
    "destination" : [ "obj-28", 0 ],
    "hidden" : 0,
    "disabled" : 0
    }
    }
    , {
    "patchline" : {
    "source" : [ "obj-26", 0 ],
    "destination" : [ "obj-128", 0 ],
    "hidden" : 0,
    "disabled" : 0
    }
    }
    , {
    "patchline" : {
    "source" : [ "obj-25", 1 ],
    "destination" : [ "obj-69", 0 ],
    "hidden" : 0,
    "disabled" : 0
    }
    }
    , {
    "patchline" : {
    "source" : [ "obj-25", 0 ],
    "destination" : [ "obj-26", 0 ],
    "hidden" : 0,
    "disabled" : 0
    }
    }
    , {
    "patchline" : {
    "source" : [ "obj-24", 0 ],
    "destination" : [ "obj-25", 0 ],
    "hidden" : 0,
    "disabled" : 0
    }
    }
    , {
    "patchline" : {
    "source" : [ "obj-24", 0 ],
    "destination" : [ "obj-2", 1 ],
    "hidden" : 0,
    "disabled" : 0
    }
    }
    , {
    "patchline" : {
    "source" : [ "obj-20", 1 ],
    "destination" : [ "obj-30", 0 ],
    "hidden" : 0,
    "disabled" : 0
    }
    }
    , {
    "patchline" : {
    "source" : [ "obj-20", 0 ],
    "destination" : [ "obj-24", 0 ],
    "hidden" : 0,
    "disabled" : 0
    }
    }
    , {
    "patchline" : {
    "source" : [ "obj-16", 0 ],
    "destination" : [ "obj-27", 0 ],
    "hidden" : 0,
    "disabled" : 0
    }
    }
    , {
    "patchline" : {
    "source" : [ "obj-128", 0 ],
    "destination" : [ "obj-91", 0 ],
    "hidden" : 0,
    "midpoints" : [ 166.5, 398.5, 346.5, 398.5 ],
    "disabled" : 0
    }
    }
    , {
    "patchline" : {
    "source" : [ "obj-128", 1 ],
    "destination" : [ "obj-27", 0 ],
    "hidden" : 0,
    "disabled" : 0
    }
    }
    , {
    "patchline" : {
    "source" : [ "obj-114", 0 ],
    "destination" : [ "obj-20", 0 ],
    "hidden" : 0,
    "disabled" : 0
    }
    }
    , {
    "patchline" : {
    "source" : [ "obj-10", 0 ],
    "destination" : [ "obj-59", 0 ],
    "hidden" : 0,
    "disabled" : 0
    }
    }
    , {
    "patchline" : {
    "source" : [ "obj-10", 1 ],
    "destination" : [ "obj-39", 1 ],
    "hidden" : 0,
    "disabled" : 0
    }
    }
    ],
    "appversion" : {
    "major" : 6,
    "minor" : 1,
    "revision" : 6,
    "architecture" : "x86"
    }
    }
    -----------end_max5_patcher-----------

    • Aug 17 2015 | 7:09 pm
      Hi,
      did you (or anyone) have any luck getting the original patch to work again?
      (the one pasted here is broken)
      Best
    • Aug 17 2015 | 10:19 pm
      Haven't tested extensively, but something like this should work (just changed the regexp):
    • Aug 18 2015 | 8:41 am
      I haven't looked into that since my post, but maybe it wil work with changing the regexp.
      With Bing search engine my installation has run for one month without problems.
      But i think you will need more to run it with Google.
      Will try to look into that ones again some of the coming evenings.